dispel.processing.data_set module#

Data set processing functionalities.

class dispel.processing.data_set.DataSetProcessingStep[source]#

Bases: DataSetProcessingStepMixin, LevelProcessingStep

A processing step that processes data sets.

Parameters:
__init__(data_set_ids=None, level_filter=None)[source]#
Parameters:
assert_valid_data_sets(data_sets, level, reading, **kwargs)[source]#

Perform assertions that a given data sets can be processed.

Parameters:
flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:
Return type:

Generator[Flag, None, None]

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:
Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

process_level(level, reading, **kwargs)[source]#

Process the provided Level.

Parameters:
Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

class dispel.processing.data_set.DataSetProcessingStepMixin[source]#

Bases: TaskMixin, DataSetProcessingStepProtocol, LevelProcessingStepProtocol

A mixin class that processes data sets.

__init__(*args, **kwargs)[source]#
assert_valid_level(level, reading, **kwargs)[source]#

Assert that the level has the appropriate valid data sets.

Parameters:
data_set_ids: str | Iterable[str]#

An iterable of data sets to be being processed

get_data_frames(level)[source]#

Get the raw data from all data sets in question.

Parameters:

level (Level) – The level from which to get the data sets.

Returns:

A list of all raw data frames with the specified ids.

Return type:

List[pandas.DataFrame]

get_data_set_ids()[source]#

Get the data set ids to be processed.

Return type:

Iterable[str]

get_raw_data_sets(level)[source]#

Get the raw data sets from all data sets in question.

Parameters:

level (Level) – The level from which to get the data sets.

Returns:

A list of all raw data sets with the specified ids.

Return type:

List[RawDataSet]

class dispel.processing.data_set.DataSetProcessingStepProtocol[source]#

Bases: object

Abstract class for data set processing steps.

abstract assert_valid_data_sets(data_sets, level, reading, **kwargs)[source]#

Assert that the to be processed data sets are valid.

Parameters:
abstract flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:
Return type:

Generator[Flag, None, None]

abstract get_data_frames(level)[source]#

Get the raw data from all data sets in question.

Parameters:

level (Level) – The level from which to get the data sets.

Returns:

A list of all raw data frames with the specified ids.

Return type:

List[pandas.DataFrame]

abstract get_data_set_ids()[source]#

Get the data set ids to be processed.

Return type:

Iterable[str]

get_data_sets_flag_targets(data_sets, level, reading, **kwargs)[source]#

Get the level flag targets.

Parameters:
  • data_sets (Sequence[DataFrame]) – The data sets to be flagged.

  • level (Level) – The level associated with the data sets.

  • reading (Reading) – The reading associated with the data set flag.

  • kwargs – Additional keyword arguments eventually used for flag targets extraction.

Returns:

An iterable of entities that are flagged.

Return type:

Iterable[EntityType]

abstract get_raw_data_sets(level)[source]#

Get the raw data sets from all data sets in question.

Parameters:

level (Level) – The level from which to get the data sets.

Returns:

A list of all raw data sets with the specified ids.

Return type:

List[RawDataSet]

abstract process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:
  • data_sets (Sequence[DataFrame]) – The data sets to be processed.

  • level (Level) – The level to be processed.

  • reading (Reading) – The reading to be processed.

  • kwargs – Additional arguments passed by process_level().

Yields:

ProcessResultType – Results from processing the data sets.

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

class dispel.processing.data_set.FlagDataSetStep[source]#

Bases: FlagStepMixin, DataSetProcessingStep

A data set flag class.

Parameters:

Examples

Assuming you want to flag the accelerometer signal data of the U-Turn task to verify that it doesn’t exceed a certain threshold, you can use the following flag step:

>>> from dispel.data.values import AbbreviatedValue as AV
>>> from dispel.processing.data_set import FlagDataSetStep
>>> step = FlagDataSetStep(
...     data_set_ids = 'accelerometer',
...     level_filter = 'utt',
...     task_name = AV('U-Turn test', 'utt'),
...     flag_name = AV('accelerometer signal threshold', 'ast'),
...     flag_type = FlagType.TECHNICAL,
...     flag_severity = FlagSeverity.INVALIDATION,
...     reason = 'The U-Turn accelerometer signal exceeds 50 m/s^2.',
...     stop_processing=False,
...     flagging_function=lambda data: data.max().max() < 50.
... )

The flagging function will be called with the level 'utt' as specified in the level_filter argument. If the function has a named parameter matching reading, the reading will be passed to the flagging function.

Another common scenario is to define a class that can be reused.

>>> from dispel.data.flags import FlagType
>>> from dispel.processing.data_set import FlagDataSetStep
>>> class UTTAccelerometerSignal(FlagDataSetStep):
...     data_set_ids = 'accelerometer'
...     level_filter = 'utt'
...     task_name = AV('U-Turn test', 'utt')
...     flag_name = AV('u-turn duration', 'utt_dur')
...     flag_type = FlagType.TECHNICAL
...     flag_severity = FlagSeverity.INVALIDATION
...     reason = 'The U-Turn accelerometer signal exceeds 50 m/s^2.'
...     stop_processing = True
...     flagging_function = lambda data: data.max().max() < 50

Another convenient way to provide the flagging function is to use the @flag decorator, one can also use multiple flags for the same class as well as multiple data sets. Below is an example of the flag of a data set (userInput) through the use of multiple ones in the flagging function (userInput, screen).

>>> import pandas as pd
>>> from dispel.processing.flags import flag
>>> from dispel.processing.level import FlagLevelStep
>>> class UTTAccelerometerSignal(FlagDataSetStep):
...     data_set_ids = ['userInput', 'screen']
...     target_ids = 'userInput'
...     level_filter = 'cps'
...     task_name = AV('Cognitive processing speed test', 'cps')
...     flag_name = AV('answer timestamps', 'at')
...     flag_type = FlagType.TECHNICAL
...     flag_severity = FlagSeverity.INVALIDATION
...     reason = 'The user answer timestamps do not match the screen info.'
...     stop_processing = False
...
...     @flag
...     def _timestamps(
...         self,
...         user_input: pd.DataFrame,
...         screen: pd.DataFrame
...     ) -> bool:
...         return list(user_input.ts) == list(screen.ts)

Note that the @flag decorator can take keyword arguments. These kwargs are merged with any keyword arguments that come from processing step groups in order to format the flag reason.

__init__(data_set_ids=None, level_filter=None, task_name=None, flag_name=None, flag_type=None, flag_severity=None, reason=None, stop_processing=False, flagging_function=None, target_ids=None)[source]#
Parameters:
flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:
Return type:

Generator[Flag, None, None]

get_data_sets_flag_targets(data_sets, level, reading, **kwargs)[source]#

Get flag targets for data sets flagging.

Parameters:
Return type:

Iterable[Reading | Level | RawDataSet | MeasureValue | LevelEpoch]

get_flag_targets(reading, level=None, **kwargs)[source]#

Get flag targets for data set flagging.

Parameters:
Return type:

Iterable[Reading | Level | RawDataSet | MeasureValue | LevelEpoch]

get_target_ids()[source]#

Get the ids of the target data sets to be flagged.

Returns:

The identifiers of the target data sets.

Return type:

str

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:
Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

target_ids: str | Iterable[str] | None = None#
class dispel.processing.data_set.MutateDataSetProcessingStepBase[source]#

Bases: DataSetProcessingStep

A base class for transformation and extraction steps.

Parameters:
__init__(data_set_ids=None, transform_function=None, level_filter=None)[source]#
Parameters:
get_transform_function()[source]#

Get the transformation function.

Return type:

Callable[[…], Any] | None

get_transform_functions()[source]#

Get all transformation functions associated with this step.

Return type:

Generator[Tuple[Callable, Dict[str, Any]], None, None]

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:
Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

transform_function = None#

The function to be applied to the data sets.

abstract wrap_result(res, level, reading, **kwargs)[source]#

Wrap the transformation result into a processing result.

Parameters:
Return type:

Generator[LevelProcessingResult | RawDataSetProcessingResult, None, None]

class dispel.processing.data_set.RawDataSetProcessingResult[source]#

Bases: LevelProcessingResult

The processing result of a transform step.

__init__(level, step, sources, result, concatenate=False, overwrite=False)#
Parameters:
Return type:

None

concatenate: bool = False#

Whether to concatenate the result if it already exists in the given level

overwrite: bool = False#

Whether to overwrite the result if it already exists in the given level

class dispel.processing.data_set.StorageError[source]#

Bases: Enum

Raw data set storage handler.

CONCATENATE = 'concatenate'#
IGNORE = 'ignore'#
OVERWRITE = 'overwrite'#
RAISE = 'raise'#
property concatenate: bool#

Return True if the handling is to concatenate.

property overwrite: bool#

Return True if the handling is to overwrite.

dispel.processing.data_set.decorated_processing_function(func, data_sets, reading, level, **kwargs)[source]#

Decorate functions for processing steps.

Pass reading and/or level in args if existent in function arguments.

Parameters:
  • func (Callable[[...], Any]) – The processing function to be decorated.

  • data_sets (Sequence[DataFrame]) – The data frames on which the processing function is to be applied.

  • reading (Reading) – The corresponding reading.

  • level (Level) – The corresponding level.

  • kwargs – Additional key word arguments to be passed to the processing function.

Returns:

The output of the given processing function.

Return type:

Any

dispel.processing.data_set.transformation(_func=None, **kwargs)[source]#

Decorate a function as a transformation function.