dispel.processing.data_set module#

Data set processing functionalities.

class dispel.processing.data_set.DataSetProcessingStep[source]#

Bases: DataSetProcessingStepMixin, LevelProcessingStep

A processing step that processes data sets.

Parameters:

data_set_ids (str | Iterable[str]) – Optional data set ids to be processed. See DataSetProcessingStepMixin.
level_filter (dispel.processing.level.LevelFilter) – Optional level filter. See LevelProcessingStep.

__init__(data_set_ids=None, level_filter=None)[source]#

Parameters:

data_set_ids (str | Iterable[str] | None) –
level_filter (str | LevelId | List[str] | List[LevelId] | LevelFilter | None) –

assert_valid_data_sets(data_sets, level, reading, **kwargs)[source]#

Perform assertions that a given data sets can be processed.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[Flag, None, None]

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

process_level(level, reading, **kwargs)[source]#

Process the provided Level.

Parameters:

level (Level) –
reading (Reading) –

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

class dispel.processing.data_set.DataSetProcessingStepMixin[source]#

Bases: TaskMixin, DataSetProcessingStepProtocol, LevelProcessingStepProtocol

A mixin class that processes data sets.

__init__(*args, **kwargs)[source]#

assert_valid_level(level, reading, **kwargs)[source]#

Assert that the level has the appropriate valid data sets.

Parameters:

level (Level) –
reading (Reading) –

data_set_ids: str | Iterable[str]#: An iterable of data sets to be being processed

get_data_frames(level)[source]#

Get the raw data from all data sets in question.

Parameters:: level (Level) – The level from which to get the data sets.
Returns:: A list of all raw data frames with the specified ids.
Return type:: List[pandas.DataFrame]

get_data_set_ids()[source]#

Get the data set ids to be processed.

Return type:: Iterable[str]

get_raw_data_sets(level)[source]#

Get the raw data sets from all data sets in question.

Parameters:: level (Level) – The level from which to get the data sets.
Returns:: A list of all raw data sets with the specified ids.
Return type:: List[RawDataSet]

class dispel.processing.data_set.DataSetProcessingStepProtocol[source]#

Bases: object

Abstract class for data set processing steps.

abstract assert_valid_data_sets(data_sets, level, reading, **kwargs)[source]#

Assert that the to be processed data sets are valid.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

abstract flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[Flag, None, None]

abstract get_data_frames(level)[source]#

Get the raw data from all data sets in question.

Parameters:: level (Level) – The level from which to get the data sets.
Returns:: A list of all raw data frames with the specified ids.
Return type:: List[pandas.DataFrame]

abstract get_data_set_ids()[source]#

Get the data set ids to be processed.

Return type:: Iterable[str]

get_data_sets_flag_targets(data_sets, level, reading, **kwargs)[source]#

Get the level flag targets.

Parameters:

data_sets (Sequence[DataFrame]) – The data sets to be flagged.
level (Level) – The level associated with the data sets.
reading (Reading) – The reading associated with the data set flag.
kwargs – Additional keyword arguments eventually used for flag targets extraction.

Returns:

An iterable of entities that are flagged.

Return type:

Iterable[EntityType]

abstract get_raw_data_sets(level)[source]#

Get the raw data sets from all data sets in question.

Parameters:: level (Level) – The level from which to get the data sets.
Returns:: A list of all raw data sets with the specified ids.
Return type:: List[RawDataSet]

abstract process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) – The data sets to be processed.
level (Level) – The level to be processed.
reading (Reading) – The reading to be processed.
kwargs – Additional arguments passed by process_level().

Yields:

ProcessResultType – Results from processing the data sets.

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

class dispel.processing.data_set.FlagDataSetStep[source]#

Bases: FlagStepMixin, DataSetProcessingStep

A data set flag class.

Parameters:

data_set_ids (str | Iterable[str]) – An optional id or iterable of ids for raw data set(s) to be used for the flag. See DataSetProcessingStepMixin.
level_filter (dispel.processing.level.LevelFilter) – An optional filter to limit the levels being processed. See LevelProcessingStep.
task_name (dispel.data.values.AbbreviatedValue | str) – An optional abbreviated name value of the task used for the flag. See FLagStepMixin.
flag_name (dispel.data.values.AbbreviatedValue | str) – An optional abbreviated name value of the considered flag. See FlagStepMixin.
flag_type (dispel.data.flags.FlagType | str) – An optional flag type. See FlagType.
flag_severity (dispel.data.flags.FlagSeverity | str) – An optional flag severity. See FlagSeverity.
reason (str) – An optional string reason of the considered flag. See FlagStepMixin.
stop_processing (bool) – An optional boolean that specifies whether the flag is stop_processing, i.e., raises an error or not. See FlagStepMixin.
flagging_function (Callable[[...], bool] | None) – An optional flagging function to be applied to the pandas data frames of the provided raw data sets. See FlagStepMixin.
target_ids (str | Iterable[str] | None) – An optional id(s) of the target data sets to be flagged. If the user doesn’t specify the targets then the targets will automatically be the used data sets.

Examples

Assuming you want to flag the accelerometer signal data of the U-Turn task to verify that it doesn’t exceed a certain threshold, you can use the following flag step:

>>> from dispel.data.values import AbbreviatedValue as AV
>>> from dispel.processing.data_set import FlagDataSetStep
>>> step = FlagDataSetStep(
...     data_set_ids = 'accelerometer',
...     level_filter = 'utt',
...     task_name = AV('U-Turn test', 'utt'),
...     flag_name = AV('accelerometer signal threshold', 'ast'),
...     flag_type = FlagType.TECHNICAL,
...     flag_severity = FlagSeverity.INVALIDATION,
...     reason = 'The U-Turn accelerometer signal exceeds 50 m/s^2.',
...     stop_processing=False,
...     flagging_function=lambda data: data.max().max() < 50.
... )

The flagging function will be called with the level 'utt' as specified in the level_filter argument. If the function has a named parameter matching reading, the reading will be passed to the flagging function.

Another common scenario is to define a class that can be reused.

>>> from dispel.data.flags import FlagType
>>> from dispel.processing.data_set import FlagDataSetStep
>>> class UTTAccelerometerSignal(FlagDataSetStep):
...     data_set_ids = 'accelerometer'
...     level_filter = 'utt'
...     task_name = AV('U-Turn test', 'utt')
...     flag_name = AV('u-turn duration', 'utt_dur')
...     flag_type = FlagType.TECHNICAL
...     flag_severity = FlagSeverity.INVALIDATION
...     reason = 'The U-Turn accelerometer signal exceeds 50 m/s^2.'
...     stop_processing = True
...     flagging_function = lambda data: data.max().max() < 50

Another convenient way to provide the flagging function is to use the @flag decorator, one can also use multiple flags for the same class as well as multiple data sets. Below is an example of the flag of a data set (userInput) through the use of multiple ones in the flagging function (userInput, screen).

>>> import pandas as pd
>>> from dispel.processing.flags import flag
>>> from dispel.processing.level import FlagLevelStep
>>> class UTTAccelerometerSignal(FlagDataSetStep):
...     data_set_ids = ['userInput', 'screen']
...     target_ids = 'userInput'
...     level_filter = 'cps'
...     task_name = AV('Cognitive processing speed test', 'cps')
...     flag_name = AV('answer timestamps', 'at')
...     flag_type = FlagType.TECHNICAL
...     flag_severity = FlagSeverity.INVALIDATION
...     reason = 'The user answer timestamps do not match the screen info.'
...     stop_processing = False
...
...     @flag
...     def _timestamps(
...         self,
...         user_input: pd.DataFrame,
...         screen: pd.DataFrame
...     ) -> bool:
...         return list(user_input.ts) == list(screen.ts)

Note that the @flag decorator can take keyword arguments. These kwargs are merged with any keyword arguments that come from processing step groups in order to format the flag reason.

__init__(data_set_ids=None, level_filter=None, task_name=None, flag_name=None, flag_type=None, flag_severity=None, reason=None, stop_processing=False, flagging_function=None, target_ids=None)[source]#

Parameters:

data_set_ids (str | Iterable[str] | None) –
level_filter (str | LevelId | List[str] | List[LevelId] | LevelFilter | None) –
task_name (str | AbbreviatedValue | None) –
flag_name (str | AbbreviatedValue | None) –
flag_type (FlagType | str | None) –
flag_severity (FlagSeverity | str | None) –
reason (str | AbbreviatedValue | None) –
stop_processing (bool) –
flagging_function (Callable[[...], bool] | None) –
target_ids (str | Iterable[str] | None) –

flag_data_sets(data_sets, level, reading, **kwargs)[source]#

Flag the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[Flag, None, None]

get_data_sets_flag_targets(data_sets, level, reading, **kwargs)[source]#

Get flag targets for data sets flagging.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Iterable[Reading | Level | RawDataSet | MeasureValue | LevelEpoch]

get_flag_targets(reading, level=None, **kwargs)[source]#

Get flag targets for data set flagging.

Parameters:

reading (Reading) –
level (Level | None) –

Return type:

Iterable[Reading | Level | RawDataSet | MeasureValue | LevelEpoch]

get_target_ids()[source]#

Get the ids of the target data sets to be flagged.

Returns:: The identifiers of the target data sets.
Return type:: str

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

target_ids: str | Iterable[str] | None = None#

class dispel.processing.data_set.MutateDataSetProcessingStepBase[source]#

Bases: DataSetProcessingStep

A base class for transformation and extraction steps.

Parameters:

data_set_ids (str | Iterable[str]) – An optional list of data set ids to be used for the transformation. See DataSetProcessingStepMixin.
transform_function – An optional function to be applied to the data sets. If no function is passed the class variable transform_function will be used. Alternatively, the get_transform_function() can be overwritten to provide the transformation function. If there is more than one function to be applied one can overwrite get_transform_functions(). Otherwise, all class functions decorated with @transformation will be considered as a transformation function.
level_filter (dispel.processing.level.LevelFilter) – An optional filter to limit the levels being processed. See LevelProcessingStep.

__init__(data_set_ids=None, transform_function=None, level_filter=None)[source]#

Parameters:

data_set_ids (str | Iterable[str] | None) –
transform_function (Callable[[...], Any] | None) –
level_filter (str | LevelId | List[str] | List[LevelId] | LevelFilter | None) –

get_transform_function()[source]#

Get the transformation function.

Return type:: Callable[[…], Any] | None

get_transform_functions()[source]#

Get all transformation functions associated with this step.

Return type:: Generator[Tuple[Callable, Dict[str, Any]], None, None]

process_data_sets(data_sets, level, reading, **kwargs)[source]#

Process the provided data sets.

Parameters:

data_sets (Sequence[DataFrame]) –
level (Level) –
reading (Reading) –

Return type:

Generator[ProcessingResult | ProcessingControlResult, None, None]

transform_function = None#: The function to be applied to the data sets.

abstract wrap_result(res, level, reading, **kwargs)[source]#

Wrap the transformation result into a processing result.

Parameters:

res (Any) –
level (Level) –
reading (Reading) –
kwargs (Any) –

Return type:

Generator[LevelProcessingResult | RawDataSetProcessingResult, None, None]

class dispel.processing.data_set.RawDataSetProcessingResult[source]#

Bases: LevelProcessingResult

The processing result of a transform step.

__init__(level, step, sources, result, concatenate=False, overwrite=False)#

Parameters:

level (Level) –
step (ProcessingStep) –
sources (Iterable[Reading | Level | RawDataSet | MeasureValue | LevelEpoch] | Reading | Level | RawDataSet | MeasureValue | LevelEpoch) –
result (Level | MeasureValue | MeasureSet | LevelEpoch | RawDataSet) –
concatenate (bool) –
overwrite (bool) –

Return type:

None

concatenate: bool = False#: Whether to concatenate the result if it already exists in the given level

overwrite: bool = False#: Whether to overwrite the result if it already exists in the given level

class dispel.processing.data_set.StorageError[source]#

Bases: Enum

Raw data set storage handler.

CONCATENATE = 'concatenate'#

IGNORE = 'ignore'#

OVERWRITE = 'overwrite'#

RAISE = 'raise'#

property concatenate: bool#: Return True if the handling is to concatenate.

property overwrite: bool#: Return True if the handling is to overwrite.

dispel.processing.data_set.decorated_processing_function(func, data_sets, reading, level, **kwargs)[source]#

Decorate functions for processing steps.

Pass reading and/or level in args if existent in function arguments.

Parameters:

func (Callable[[...], Any]) – The processing function to be decorated.
data_sets (Sequence[DataFrame]) – The data frames on which the processing function is to be applied.
reading (Reading) – The corresponding reading.
level (Level) – The corresponding level.
kwargs – Additional key word arguments to be passed to the processing function.

Returns:

The output of the given processing function.

Return type:

Any

dispel.processing.data_set.transformation(_func=None, **kwargs)[source]#: Decorate a function as a transformation function.