dispel.providers.bdh.tasks.typing.steps module#

Typing module.

class dispel.providers.bdh.tasks.typing.steps.AggregateCorrectingDuration[source]#

Bases: AggregateRawDataSetColumn

An extraction processing step for correcting duration measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'duration'#
data_set_ids: str | Iterable[str] = 'correcting_duration_correct_submissions'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateIntervalCorrectLetters[source]#

Bases: AggregateRawDataSetColumn

Interval between correct letters until a mistake - measures.

Parameters:

differentiate – An boolean indicating if we want to compute the interval or the derivative of the intervals.

__init__(differentiate=False, **kwargs)[source]#
Parameters:

differentiate (bool) –

class dispel.providers.bdh.tasks.typing.steps.AggregateLettersIntervals[source]#

Bases: AggregateRawDataSetColumn

Extract letter intervals related measures.

Parameters:

category – The category on which one wants to extract measures. If not provided the measures will be extracted based on the data set computed on every word.

__init__(category=None, **kwargs)[source]#
Parameters:

category (WordState | None) –

class dispel.providers.bdh.tasks.typing.steps.AggregateLettersTypedPerWordRatio[source]#

Bases: AggregateRawDataSetColumn

Aggregate measures related to the ratio of letters typed.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'letters_typed_over_length'#
data_set_ids: str | Iterable[str] = 'letters_typed_over_length'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateMaxDeviation[source]#

Bases: AggregateRawDataSetColumn

Aggregate step related to the maximum deviation of letter intervals.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum'), (<function iqr>, 'iqr')]#
column_id: str = 'max_deviation_letter_interval'#
data_set_ids: str | Iterable[str] = ['max_deviation_letter_interval']#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateReactingTime[source]#

Bases: AggregateRawDataSetColumn

An aggregation processing step for reacting time measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'reacting_time'#
data_set_ids: str | Iterable[str] = 'reacting_times_correct_submissions'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateReactionDuration[source]#

Bases: AggregateRawDataSetColumn

An aggregation step for reaction duration measure.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'duration'#
data_set_ids: str | Iterable[str] = 'reaction_durations_correct_submissions'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateReactionTime[source]#

Bases: AggregateRawDataSetColumn

An aggregation step for the reaction time measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'reaction_time'#
data_set_ids: str | Iterable[str] = 'reaction_time_per_word'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateReactionTimeCorrectLetter[source]#

Bases: AggregateRawDataSetColumn

An aggregation step for reaction time first correct letter measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'reaction_time_first_correct'#
data_set_ids: str | Iterable[str] = 'reaction_time_per_word'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateSimilarityRatioMeasures[source]#

Bases: AggregateRawDataSetColumn

Aggregate similarity ratio measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'similarity_ratio'#
data_set_ids: str | Iterable[str] = 'similarity_ratio_grouped'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateTop10IntervalDefaultMeasures[source]#

Bases: AggregateRawDataSetColumn

Extract measures related to the top ten percent letters intervals.

Parameters:

category – The category on which one wants to extract measures. If not provided the measures will be extracted based on the data set computed on every letter intervals.

__init__(category=None, **kwargs)[source]#
Parameters:

category (KeyState | None) –

class dispel.providers.bdh.tasks.typing.steps.AggregateWordDuration[source]#

Bases: AggregateRawDataSetColumn

An aggregation step for Word duration measures.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'word_duration'#
data_set_ids: str | Iterable[str] = 'word_duration'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.AggregateWordDurationLevelSlope[source]#

Bases: AggregateRawDataSetColumn

An aggregation step for statistics about the slope of word duration.

aggregations: AggregationsDefinitionType = [('mean', 'mean'), ('std', 'standard deviation'), ('median', 'median'), ('min', 'minimum'), ('max', 'maximum')]#
column_id: str = 'difference'#
data_set_ids: str | Iterable[str] = 'word_duration_per_difficulty_mean_difference'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.BDHTypingSteps[source]#

Bases: ProcessingStepGroup

BDH-specific processing steps for typing.

kwargs: Dict[str, Any] = {'task_name': Typing test (TT)}#
steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.PreprocessingTypingGroup object>, <dispel.providers.bdh.tasks.typing.steps.ReactionTimeGroup object>, <dispel.providers.bdh.tasks.typing.steps.ExtractPatientScore object>, <dispel.providers.bdh.tasks.typing.steps.WordDurationGroup object>, <dispel.providers.bdh.tasks.typing.steps.TimeToFinishGroup object>, <dispel.providers.bdh.tasks.typing.steps.ReactionCorrectingReactingDurationGroup object>, <dispel.providers.bdh.tasks.typing.steps.CountWordsGroup object>, <dispel.providers.bdh.tasks.typing.steps.CountLettersGroup object>, <dispel.providers.bdh.tasks.typing.steps.TimeBetweenLettersGroup object>, <dispel.providers.bdh.tasks.typing.steps.RatioAndSlopeGroup object>, <dispel.providers.bdh.tasks.typing.steps.FlagAutoCompleteGroup object>, <dispel.providers.bdh.tasks.typing.steps.TypingTremorMeasuresGroup object>]#
class dispel.providers.bdh.tasks.typing.steps.CountLettersGroup[source]#

Bases: ProcessingStepGroup

BDH Typing steps counting number of letters and similarity ratio.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformSimilarityRatio object>, <dispel.providers.bdh.tasks.typing.steps.TransformSimilarityRatioGroup object>, <dispel.providers.bdh.tasks.typing.steps.AggregateSimilarityRatioMeasures object>, <dispel.providers.bdh.tasks.typing.steps.TransformLettersTypedPerWordRatio object>, <dispel.providers.bdh.tasks.typing.steps.AggregateLettersTypedPerWordRatio object>, <dispel.providers.bdh.tasks.typing.steps.ExtractCountCorrectLetters object>]#
class dispel.providers.bdh.tasks.typing.steps.CountWordsGroup[source]#

Bases: ProcessingStepGroup

BDH Typing step counting words typed successfully with(out) errors.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformCorrectSubmissionAndTime object>, <dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoError object>, <dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoError object>, <dispel.providers.bdh.tasks.typing.steps.TransformCorrectSubmissionAndTimeInRow object>, <dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoErrorInRow object>, <dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoErrorInRow object>, <dispel.providers.bdh.tasks.typing.steps.ExtractIncorrectWords object>]#
class dispel.providers.bdh.tasks.typing.steps.ExtractAutocomplete[source]#

Bases: ExtractStep

Extract the number of autocompletions.

data_set_ids: str | Iterable[str] = 'keys_with_autocompletion'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

transform_function()#

Get the total number of autocompletions.

Parameters:

keys_with_autocompletion (pd.DataFrame) – A dataframe indicating whether a key was autocompleted or not.

Returns:

Number of autocompletions performed by the user.

Return type:

int

class dispel.providers.bdh.tasks.typing.steps.ExtractCountCorrectLetters[source]#

Bases: ExtractStep

Extract the number of correct letters.

static count_correct_letters(submission_state)[source]#

Count the number of correct letters.

Parameters:

submission_state (DataFrame) –

Return type:

int

data_set_ids: str | Iterable[str] = 'submission_state'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.ExtractCountKeyPressed[source]#

Bases: ExtractStep

Count the number of keys pressed.

static count_key_pressed(key_typed)[source]#

Count the number of keys pressed.

Parameters:

key_typed (DataFrame) –

Return type:

int

data_set_ids: str | Iterable[str] = 'key_typed'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.ExtractIncorrectWords[source]#

Bases: ExtractStep

Extract the number of incorrect words.

data_set_ids: str | Iterable[str] = 'word'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

transform_function()#

Count the number of incorrect words.

Parameters:

word (pd.DataFrame) – A data frame of the words.

Returns:

The number of incorrect words.

Return type:

int

class dispel.providers.bdh.tasks.typing.steps.ExtractPatientScore[source]#

Bases: ExtractStep

Extract user’s typing score.

The typing score is the total number of words correctly typed.

data_set_ids: str | Iterable[str] = 'word'#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

transform_function()#

Find total number of words typed.

The total number of words typed successfully is the number of True values in the columns success.

Parameters:

word (pd.DataFrame) – A data frame containing the word information with the column success.

Returns:

Number of words successfully typed.

Return type:

int

class dispel.providers.bdh.tasks.typing.steps.ExtractRatioWordsLetters[source]#

Bases: ExtractStep

Compute the ratio of correct words divided by the number of letters.

static compute_ratio(word, key_is_letter)[source]#

Compute ratio of correct words by number of letters typed.

Parameters:
Return type:

float

data_set_ids: str | Iterable[str] = ['word', 'key_is_letter']#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

class dispel.providers.bdh.tasks.typing.steps.ExtractReactionTimeSlope[source]#

Bases: ExtractStep

Extract reaction time slope.

Parameters:

is_correct – A boolean value indicating if the ExtractStep should compute the reaction time slope on any letter or with first correct letter

__init__(is_correct, **kwargs)[source]#
Parameters:

is_correct (bool) –

class dispel.providers.bdh.tasks.typing.steps.ExtractTimeToFinishLastThreeWords[source]#

Bases: ExtractStep

Extract aggregated representation of the last three words duration.

__init__(agg, **kwargs)[source]#
Parameters:

agg (str) –

class dispel.providers.bdh.tasks.typing.steps.ExtractTypingSpeedSlope[source]#

Bases: ExtractStep

Extract typing speed slope to quantify the fatigability.

The typing speed slope is computed with discrete differentiation of the character speed (or typing speed) per word and the differentiation of the appearance timestamp of the words

data_set_ids: str | Iterable[str] = ['word', 'submission_state']#

An iterable of data sets to be being processed

definition: ValueDefinition | ValueDefinitionPrototype | None = <dispel.data.measures.MeasureValueDefinitionPrototype object>#

The specification of the measure definition

transform_function(submission_state)#

Compute the slope of the typing speed.

Parameters:
Return type:

float

class dispel.providers.bdh.tasks.typing.steps.ExtractWordDurationPerDifficulty[source]#

Bases: ExtractMultipleStep

Extract aggregated representations of word duration per difficulty.

__init__(**kwargs)[source]#
class dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoError[source]#

Bases: ExtractStep

Count the words successfully typed and written with(out) error.

__init__(error_free=True, **kwargs)[source]#
Parameters:

error_free (bool) –

class dispel.providers.bdh.tasks.typing.steps.ExtractWordTypedWithOrWoErrorInRow[source]#

Bases: ExtractStep

Count the successfully typed words written with(out) error in a row.

__init__(error_free=True, **kwargs)[source]#
Parameters:

error_free (bool) –

class dispel.providers.bdh.tasks.typing.steps.FlagAutoComplete[source]#

Bases: FlagReadingStep

Flag the reading if any autocomplete key is detected.

flag_autocomplete(reading, **kwargs)[source]#

Indicate if there was no autocompletion during the evaluation.

Parameters:

reading (Reading) –

Return type:

bool

flag_name: AV | str = autocomplete (auto_complete)#

The name of the flag

flag_severity: FlagSeverity | str = 1#
flag_type: FlagType | str = 2#

The type of the flag

reason: str = 'The user has autocompleted at least one word.'#

The detailed reason of the flag

task_name: AV | str = Typing test (TT)#

The task name

class dispel.providers.bdh.tasks.typing.steps.FlagAutoCompleteGroup[source]#

Bases: ProcessingStepGroup

BDH Typing steps to flag autocomplete behavior.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.ExtractAutocomplete object>, <dispel.providers.bdh.tasks.typing.steps.FlagAutoComplete object>]#
class dispel.providers.bdh.tasks.typing.steps.PreprocessingTypingGroup[source]#

Bases: ProcessingStepGroup

BDH typing preprocessing steps.

steps: List[ProcessingStep] = [<dispel.processing.assertions.AssertEvaluationFinished object>, <dispel.providers.bdh.tasks.typing.steps.TransformKeyTyped object>, <dispel.providers.bdh.tasks.typing.steps.TransformKeyInterval object>, <dispel.providers.bdh.tasks.typing.steps.TransformSubmissionState object>, <dispel.providers.bdh.tasks.typing.steps.TransformStateDurations object>, <dispel.providers.bdh.tasks.typing.steps.TransformDetectKeyAutocompletion object>, <dispel.providers.bdh.tasks.typing.steps.TransformDetectWordAutocompletion object>, <dispel.providers.bdh.tasks.typing.steps.TypingPreprocessingIMUGroup object>, <dispel.providers.bdh.tasks.typing.steps.TypingPreprocessingIMUGroup object>]#
class dispel.providers.bdh.tasks.typing.steps.RatioAndSlopeGroup[source]#

Bases: ProcessingStepGroup

BDH Typing steps measuring ratio of words letters and speed slope.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.ExtractRatioWordsLetters object>, <dispel.providers.bdh.tasks.typing.steps.ExtractTypingSpeedSlope object>]#
class dispel.providers.bdh.tasks.typing.steps.ReactionCorrectingReactingDurationGroup[source]#

Bases: ProcessingStepGroup

BDH Typing reaction, correcting and reacting duration processing steps.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformReactionDuration object>, <dispel.providers.bdh.tasks.typing.steps.TransformReactionDurationCorrectSubmissions object>, <dispel.providers.bdh.tasks.typing.steps.AggregateReactionDuration object>, <dispel.providers.bdh.tasks.typing.steps.TransformCorrectingDuration object>, <dispel.providers.bdh.tasks.typing.steps.TransformCorrectingDurationCorrectSubmissions object>, <dispel.providers.bdh.tasks.typing.steps.AggregateCorrectingDuration object>, <dispel.providers.bdh.tasks.typing.steps.TransformReactingTime object>, <dispel.providers.bdh.tasks.typing.steps.TransformReactingTimeCorrectSubmissions object>, <dispel.providers.bdh.tasks.typing.steps.AggregateReactingTime object>]#
class dispel.providers.bdh.tasks.typing.steps.ReactionTimeGroup[source]#

Bases: ProcessingStepGroup

BDH Typing Reaction time processing steps.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformReactionTime object>, <dispel.providers.bdh.tasks.typing.steps.AggregateReactionTime object>, <dispel.providers.bdh.tasks.typing.steps.AggregateReactionTimeCorrectLetter object>, <dispel.providers.bdh.tasks.typing.steps.ExtractReactionTimeSlope object>, <dispel.providers.bdh.tasks.typing.steps.ExtractReactionTimeSlope object>]#
class dispel.providers.bdh.tasks.typing.steps.TimeBetweenLettersGroup[source]#

Bases: ProcessingStepGroup

BDH Typing steps measuring intervals between letters.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformIsLetter object>, <dispel.providers.bdh.tasks.typing.steps.TransformLetterInterval object>, <dispel.providers.bdh.tasks.typing.steps.TransformLetterInterval object>, <dispel.providers.bdh.tasks.typing.steps.TransformLetterInterval object>, <dispel.providers.bdh.tasks.typing.steps.AggregateLettersIntervals object>, <dispel.providers.bdh.tasks.typing.steps.AggregateLettersIntervals object>, <dispel.providers.bdh.tasks.typing.steps.AggregateLettersIntervals object>, <dispel.providers.bdh.tasks.typing.steps.TransformTop10Interval object>, <dispel.providers.bdh.tasks.typing.steps.TransformTop10Interval object>, <dispel.providers.bdh.tasks.typing.steps.AggregateTop10IntervalDefaultMeasures object>, <dispel.providers.bdh.tasks.typing.steps.AggregateTop10IntervalDefaultMeasures object>, <dispel.providers.bdh.tasks.typing.steps.TransformMaxDeviation object>, <dispel.providers.bdh.tasks.typing.steps.AggregateMaxDeviation object>, <dispel.providers.bdh.tasks.typing.steps.TransformIntervalCorrectLettersUntilMistake object>, <dispel.providers.bdh.tasks.typing.steps.AggregateIntervalCorrectLetters object>, <dispel.providers.bdh.tasks.typing.steps.TransformIntervalCorrectLettersUntilMistake object>, <dispel.providers.bdh.tasks.typing.steps.AggregateIntervalCorrectLetters object>]#
class dispel.providers.bdh.tasks.typing.steps.TimeToFinishGroup[source]#

Bases: ProcessingStepGroup

BDH Typing time to finish processing steps.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.ExtractTimeToFinishLastThreeWords object>, <dispel.providers.bdh.tasks.typing.steps.ExtractTimeToFinishLastThreeWords object>, <dispel.providers.bdh.tasks.typing.steps.ExtractTimeToFinishLastThreeWords object>]#
class dispel.providers.bdh.tasks.typing.steps.TransformCorrectSubmissionAndTime[source]#

Bases: TransformStep

A data set to deal with correct (consecutive) words with(out) errors.

This transform step aims to create a data set appropriate to compute measure related to successfully written words with(out) error, and also a streak of words with(out) error(s).

The transformation, based on the submission_state data set, applies the following modifications:

  • Add the appearance timestamp as a column

  • Create a boolean translating if the state is Correct or not

  • Group the data frame by displayed word and aggregate with the min.

This results in a data frame indexed with displayed_word and has three columns: appearance_timestamp, is_error_free, success. Here appearance_timestamp is the timestamp corresponding to the appearance of the word, is_error_free is a boolean set to True if all correction_state were Correct. Finally, success indicates if the word was a correct submission.

static correct_and_ts(word, submission_state)[source]#

Group by word on screen and aggregate with the min.

Parameters:
Return type:

DataFrame

data_set_ids: str | Iterable[str] = ['word', 'submission_state']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: success (success)>, <RawDataValueDefinition: is_error_free (is error free)>, <RawDataValueDefinition: appearance_timestamp (appearance timestamp)>]#
new_data_set_id: str = 'correct_sub_and_time'#
class dispel.providers.bdh.tasks.typing.steps.TransformCorrectSubmissionAndTimeInRow[source]#

Bases: TransformStep

A transform step to count consecutive states.

The transformation, based on the correct_sub_and_time data set, works as the following : Assign a subgroup number to each group of consecutive is_error_free values. Then group by subgroup number and aggregate with count. Finally, it returns a data frame with three columns: the count, the submission state and if the sequence was free of error.

data_set_ids: str | Iterable[str] = 'correct_sub_and_time'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: success (success)>, <RawDataValueDefinition: count (count consecutive equal states)>, <RawDataValueDefinition: is_error_free (is error free)>]#
new_data_set_id: str = 'correct_sub_in_row'#
transform_function()#

Find consecutive elements and count them.

Parameters:

data (pd.DataFrame) – Data frame to compute measure related to successfully written words with(out) error, and also a streak of words with(out) error(s).

Returns:

A data frame with three columns: the count, the submission state and if the sequence was free of error

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformCorrectingDuration[source]#

Bases: TransformStep

Create a data frame that contains correcting durations.

data_set_ids: str | Iterable[str] = 'state_durations'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: duration (correcting state duration, s)>, <RawDataValueDefinition: success (success)>]#
new_data_set_id: str = 'correcting_duration'#
transform_function()#

Compute correcting duration.

The Correcting Duration is the time elapsed in a correcting state for a correct submission, in other words, the time spent correcting a mistake.

Parameters:

state_durations (pd.DataFrame) – Data frame containing submission states and duration.

Returns:

A data frame that contains correcting duration.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformCorrectingDurationCorrectSubmissions[source]#

Bases: TransformStep

Filter correcting duration values for correct words.

data_set_ids: str | Iterable[str] = 'correcting_duration'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: duration (correcting state duration for correct words., s)>]#
new_data_set_id: str = 'correcting_duration_correct_submissions'#
transform_function()#

Return correcting duration for correct words.

Parameters:

correcting_duration (pd.DataFrame) – Data frame containing correcting durations.

Returns:

A data frame that contains correcting duration for correct words.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformDetectKeyAutocompletion[source]#

Bases: TransformStep

Detect the autocompletion at a key level.

data_set_ids: str | Iterable[str] = 'submission_state'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: displayed_word (displayed word)>, <RawDataValueDefinition: key (key)>, <RawDataValueDefinition: autocomplete_per_key (autocomplete per key)>]#
new_data_set_id: str = 'keys_with_autocompletion'#
transform_function()#

Detect the autocompletion at the key level.

Create a data frame that contains all the typed keys the autocompletion status.

Parameters:

submission_state (pd.DataFrame) – A data frame that contains the user state after each keystroke.

Returns:

An new dataset with the displayed word, all the key typed and autocompletion information.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformDetectWordAutocompletion[source]#

Bases: TransformStep

A new dataset indicating if a word has been completed.

If autocompletion was used the associated word will be mark as True.

data_set_ids: str | Iterable[str] = 'keys_with_autocompletion'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: autocomplete_per_word (autocomplete per word)>]#
new_data_set_id: str = 'autocompletion_per_word'#
transform_function()#

Detect the autocompletion for all the words.

Parameters:

keys_with_autocompletion (pd.DataFrame) – A data frame that .

Returns:

The word dataset without the autocompleted words

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformDifficultyLevel[source]#

Bases: TransformStep

A transform step to compute the difficulty level of each word.

data_set_ids: str | Iterable[str] = 'word'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: difficulty_level (difficulty level)>]#
static get_difficulty(word)[source]#

Get the level of difficulty.

Parameters:

word (DataFrame) –

Return type:

Series

new_data_set_id: str = 'difficulty_level'#
class dispel.providers.bdh.tasks.typing.steps.TransformIntervalCorrectLettersUntilMistake[source]#

Bases: TransformStep

Compute the interval between correct letters until a mistake.

Parameters:

differentiate – An boolean indicating if we want to compute the interval or the derivative of the intervals.

__init__(differentiate=False)[source]#
Parameters:

differentiate (bool) –

class dispel.providers.bdh.tasks.typing.steps.TransformIsLetter[source]#

Bases: TransformStep

A transform step to find if a key is a letter.

data_set_ids: str | Iterable[str] = 'key_typed'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: key_is_letter (key pressed is letter)>]#
static is_letter(key_typed)[source]#

Identify if a key typed is a letter.

Parameters:

key_typed (DataFrame) –

Return type:

Series

new_data_set_id: str = 'key_is_letter'#
class dispel.providers.bdh.tasks.typing.steps.TransformKeyInterval[source]#

Bases: TransformStep

A transform step to find the time intervals between two key pressed.

data_set_ids: str | Iterable[str] = 'key_typed'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: key_intervals (key intervals, s)>]#
new_data_set_id: str = 'key_intervals'#
transform_function()#
class dispel.providers.bdh.tasks.typing.steps.TransformKeyTyped[source]#

Bases: TransformStep

Remove all key pressed after the last word disappeared.

data_set_ids: str | Iterable[str] = ['key_typed', 'word']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: displayed_input (displayed input)>, <RawDataValueDefinition: timestamp_out (timestamp out)>, <RawDataValueDefinition: word_id (word id)>, <RawDataValueDefinition: key (key)>]#
new_data_set_id: str = 'key_typed'#
static remove_outdated_keys(key_typed, word)[source]#

Remove keys released after last word disappearance timestamp.

Parameters:
storage_error: StorageError = 'overwrite'#
class dispel.providers.bdh.tasks.typing.steps.TransformLetterInterval[source]#

Bases: TransformStep

A transform step to compute the time between letters.

The time between letters is computed as the time separating two keys that are letters (see isalpha() method), for example, interval between words isn’t considered.

Parameters:

category – The category on which one wants to filter the data set. If not provided the data set will return interval between letters for every words. When provided category should be either Correct or Incorrect.

Raises:

ValueError – If the category provided is not allowed. Or if a category is provided but the optional dataset word_is_typed is not provided.

__init__(category=None)[source]#
Parameters:

category (WordState | None) –

class dispel.providers.bdh.tasks.typing.steps.TransformLettersTypedPerWordRatio[source]#

Bases: TransformStep

Compute the ratio of the letters typed per word divided by its length.

This transform steps computes the ratio for completed words only.

data_set_ids: str | Iterable[str] = ['submission_state', 'word']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: letters_typed_over_length (letters typed over length)>]#
new_data_set_id: str = 'letters_typed_over_length'#
transform_function(word)#

Group by word and count the letters typed for completed words.

Parameters:
  • submission_state (DataFrame) – A data frame that contains the user state after each keystroke.

  • word (DataFrame) – A data frame listing all the words displayed on screen during the test with the following columns: words, appearance_timestamp and disappearance_timestamp.

Returns:

A data frame with a column letters_typed_over_length containing the following ratio: number of letters typed divided by the length of the word (for completed words).

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformMaxDeviation[source]#

Bases: TransformStep

A transform step to evaluate the maximum deviation per word.

The deviation is computed as the squared error between the current interval and the mean of these intervals within a word.

static compute_max_letter_interval_deviation(data)[source]#

Compute the maximum letter interval deviation.

Parameters:

data (DataFrame) –

Return type:

DataFrame

data_set_ids: str | Iterable[str] = 'letter_intervals'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: max_deviation_letter_interval (maximum deviation letter interval, s)>]#
new_data_set_id: str = 'max_deviation_letter_interval'#
class dispel.providers.bdh.tasks.typing.steps.TransformReactingTime[source]#

Bases: TransformStep

Create a data frame that contains reacting time.

data_set_ids: str | Iterable[str] = 'state_durations'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: t_between_states (reacting time, s)>, <RawDataValueDefinition: success (success)>]#
new_data_set_id: str = 'reacting_times'#
transform_function()#

Return reacting times.

The reacting time is the time elapsed between an incorrect state and a correcting state for a correct submission, in other words, the time to react to a mistake.

Parameters:

state_durations (pd.DataFrame) – Data frame containing submission states and duration.

Returns:

A data frame containing reacting times.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformReactingTimeCorrectSubmissions[source]#

Bases: TransformStep

Filter reacting times values for correct words.

data_set_ids: str | Iterable[str] = 'reacting_times'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: reacting_time (reacting time correct, s)>]#
new_data_set_id: str = 'reacting_times_correct_submissions'#
transform_function()#

Return reacting times for correct words.

Parameters:

reacting_times (pd.DataFrame) – Data frame containing correction differences.

Returns:

A data frame that contains reacting duration for correct words.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformReactionDuration[source]#

Bases: TransformStep

Create a data frame that contains the reaction duration.

data_set_ids: str | Iterable[str] = 'state_durations'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: duration (incorrect state duration, s)>, <RawDataValueDefinition: success (success)>]#
new_data_set_id: str = 'reaction_duration'#
transform_function()#

Compute the reaction duration.

The reaction duration is the time elapsed in an incorrect state for a correct submission.

Parameters:

state_durations (pd.DataFrame) – Data frame containing submission states and duration.

Returns:

A data frame that contains reaction duration.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformReactionDurationCorrectSubmissions[source]#

Bases: TransformStep

Filter reaction duration values for correct words.

data_set_ids: str | Iterable[str] = 'reaction_duration'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: duration (reaction duration for correct words, s)>]#
new_data_set_id: str = 'reaction_durations_correct_submissions'#
transform_function()#

Return reaction durations for correct words.

Parameters:

reaction_duration (pd.DataFrame) – Data frame containing reacting durations.

Returns:

A data frame that contains reaction duration for correct words.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformReactionTime[source]#

Bases: TransformStep

A transform step to find the reaction time for each word.

data_set_ids: str | Iterable[str] = ['word', 'key_typed']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: reaction_time (reaction time, s)>, <RawDataValueDefinition: reaction_time_first_correct (reaction time first correct, s)>]#
new_data_set_id: str = 'reaction_time_per_word'#
transform_function(key_typed)#

Compute the reaction time and the first correct letter reaction time.

The reaction time is the time elapsed between the appearance of a word and the time the user typed a letter. The first correct letter reaction time follows the same definition but measures the time elapsed until a correct letter is typed.

Parameters:
  • word (pd.DataFrame) – A data frame listing all the words displayed on screen during the test with the following columns: words, appearance_timestamp and disappearance_timestamp.

  • key_typed (pd.DataFrame) – The dataframe with the list of key pressed by the user, it should contain the columns: timestamp_out and key.

Returns:

A DataFrame with two columns: the reaction time and the reaction time to the first correct letter.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformSimilarityRatio[source]#

Bases: TransformStep

A transform step to compute similarity metric between answer and target.

The similarity metric that has been chosen is the ratio returned by the sequence matcher from difflib. It is a float in the range [0, 1]. Where T is the total number of elements in both sequences, and M is the number of matches, this is 2.0*M / T. Note that this is 1.0 if the sequences are identical, and 0.0 if they have nothing in common.

data_set_ids: str | Iterable[str] = 'submission_state'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: displayed_input (displayed input)>, <RawDataValueDefinition: displayed_word (displayed word)>, <RawDataValueDefinition: similarity_ratio (similarity ratio)>]#
new_data_set_id: str = 'similarity_ratio'#
transform_function()#

Apply similarity ratio.

The similarity ratio is computed between the word being typed displayed_input and the word on screen displayed_word.

Parameters:

submission_state (DataFrame) – A data frame that contains the user state after each keystroke.

Returns:

  • Similarity ratio between the displayed input and

  • displayed_word.

Return type:

Series

class dispel.providers.bdh.tasks.typing.steps.TransformSimilarityRatioGroup[source]#

Bases: TransformStep

Group similarity ratio by displayed_word and keep the max.

In order to only keep incorrect words we remove similarity ratio equal to one.

data_set_ids: str | Iterable[str] = 'similarity_ratio'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: similarity_ratio (similarity ratio)>]#
static group_and_max_sim_ratio(data)[source]#

Group by displayed word and get the similarity ratio max.

Parameters:

data (DataFrame) –

Return type:

DataFrame

new_data_set_id: str = 'similarity_ratio_grouped'#
class dispel.providers.bdh.tasks.typing.steps.TransformStateDurations[source]#

Bases: TransformStep

Create a data frame that contains the duration of each state.

data_set_ids: str | Iterable[str] = ['submission_state', 'key_intervals', 'word']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: submission_state (submission state)>, <RawDataValueDefinition: duration (Duration of a given state., s)>, <RawDataValueDefinition: t_between_states (Time between states., s)>, <RawDataValueDefinition: success (success)>, <RawDataValueDefinition: displayed_word (displayed word)>]#
new_data_set_id: str = 'state_durations'#
transform_function(key_intervals, word)#

Return submission state duration.

To compute the state duration, we group by displayed word and by submission state. Indeed, we want to compute statistics per word on the time spent in a given state or between states.

Parameters:
  • submission_state (pd.DataFrame) – Data frame containing the submission state each time a user types a key.

  • key_intervals (pd.DataFrame) – Time intervals between samples

  • word (pd.DataFrame) – A data frame listing all the words displayed on screen during the test with the following columns: words, appearance_timestamp and disappearance_timestamp.

Returns:

A data frame that contains duration of each correcting state.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformSubmissionState[source]#

Bases: TransformStep

Create a dataframe that contains the submission state.

This transform step translate the state at each time a user types a key. Available states are the following: Correct, Incorrect, and Correcting. The state is Correct if the textbox is empty or if it forms a subpart of the word displayed on the screen. The state is incorrect whenever the user types a key (that is not backspace), leading to a textbox that does not form a subpart of the word displayed on the screen. Finally, the state is identified as correcting whenever the user corrects its mistake by typing backspace; it can be seen as an intermediate state between an incorrect and a correct state.

data_set_ids: str | Iterable[str] = ['word', 'key_typed']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: submission_state (submission state)>, <RawDataValueDefinition: displayed_word (displayed word)>, <RawDataValueDefinition: displayed_input (displayed input)>, <RawDataValueDefinition: key (key)>, <RawDataValueDefinition: success (success)>]#
new_data_set_id: str = 'submission_state'#
transform_function(key_typed)#

Compute the submission state.

Create a data frame that contains the state at each time a user types a key. Available states are the following: Correct, Incorrect, and Correcting. The state is Correct if the textbox is empty or if it forms a subpart of the word displayed on the screen. The state is incorrect whenever the user types a key (that is not backspace), leading to a textbox that does not form a subpart of the word displayed on the screen. Finally, the state is identified as correcting whenever the user corrects its mistake by typing backspace; it can be seen as an intermediate state between an incorrect and a correct state.

Parameters:
  • word (pd.DataFrame) – A data frame listing all the words displayed on screen during the test with the following columns: words, appearance_timestamp and disappearance_timestamp.

  • key_typed (pd.DataFrame) – The dataframe with the list of key pressed by the user, it should contain the columns: timestamp_out and key.

Returns:

A data frame that contains the user state after each keystroke.

Return type:

pd.DataFrame

class dispel.providers.bdh.tasks.typing.steps.TransformTop10Interval[source]#

Bases: TransformStep

A transform step to find the top ten percent letters intervals.

Parameters:

category – The category on which one wants to extract measures. If not provided the measures will be extracted based on the data set computed on every letter intervals.

__init__(category=None)[source]#
Parameters:

category (KeyState | None) –

class dispel.providers.bdh.tasks.typing.steps.TransformWordDuration[source]#

Bases: TransformStep

A transform step to compute the duration of a word.

data_set_ids: str | Iterable[str] = 'word'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: word_duration (word duration, s)>]#
new_data_set_id: str = 'word_duration'#
transform_function()#

Compute the time spent per word.

Parameters:

word (pd.DataFrame) – A data frame containing the word information with the column success.

Returns:

A data frame containing the time spent per word.

Return type:

pd.Series

class dispel.providers.bdh.tasks.typing.steps.TransformWordDurationLevelDifference[source]#

Bases: TransformStep

A transform step to find the slope of word duration.

The slope is computed as the following: Let us consider the x-axis: level, and y-axis: word_duration-mean, the slope is given by the differentiation of y-axis (x-axis being incremented by one every-time).

data_set_ids: str | Iterable[str] = 'word_duration_per_difficulty'#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: difference (difference)>]#
static differentiate(data)[source]#

Differentiate average word duration.

Parameters:

data (DataFrame) –

Return type:

Series

new_data_set_id: str = 'word_duration_per_difficulty_mean_difference'#
class dispel.providers.bdh.tasks.typing.steps.TransformWordDurationPerDifficulty[source]#

Bases: TransformStep

Compute aggregates related to word duration per level difficulty.

data_set_ids: str | Iterable[str] = ['word', 'word_duration', 'difficulty_level']#

An iterable of data sets to be being processed

definitions: List[RawDataValueDefinition] = [<RawDataValueDefinition: mean (mean word duration, s)>, <RawDataValueDefinition: median (median word duration, s)>, <RawDataValueDefinition: std (standard deviation of word duration, s)>]#
new_data_set_id: str = 'word_duration_per_difficulty'#
static transform_duration_per_difficulty(word, word_duration, difficulty_level)[source]#

Aggregate word duration per level of difficulty.

Parameters:
Return type:

DataFrame

class dispel.providers.bdh.tasks.typing.steps.TypingPreprocessingIMUGroup[source]#

Bases: ProcessingStepGroup

A Preprocessing step to preprocess typing IMU Signal.

Parameters:
  • data_set_id – The data set id on which the transformation is to be performed.

  • resample_freq – Optionally, the frequency to which resample the data during the resample step.

  • columns – Optional argument to specify the columns on which the preprocessing steps should be applied.

  • max_frequency_distance – An optional integer specifying the maximum accepted distance between the expected frequency and the estimated frequency above which we raise an error.

__init__(data_set_id, resample_freq=None, columns=None, max_frequency_distance=None, **kwargs)[source]#
Parameters:
  • data_set_id (str) –

  • resample_freq (float | None) –

  • columns (List[str] | None) –

  • max_frequency_distance (int | None) –

class dispel.providers.bdh.tasks.typing.steps.TypingTremorMeasuresGroup[source]#

Bases: ProcessingStepGroup

A group of typing processing steps for tremor measures.

steps: List[ProcessingStep] = [<dispel.providers.generic.tremor.TremorMeasures object>, <dispel.providers.generic.tremor.TremorMeasures object>]#
class dispel.providers.bdh.tasks.typing.steps.WordDurationGroup[source]#

Bases: ProcessingStepGroup

BDH Typing word duration processing steps.

steps: List[ProcessingStep] = [<dispel.providers.bdh.tasks.typing.steps.TransformWordDuration object>, <dispel.providers.bdh.tasks.typing.steps.AggregateWordDuration object>, <dispel.providers.bdh.tasks.typing.steps.TransformDifficultyLevel object>, <dispel.providers.bdh.tasks.typing.steps.TransformWordDurationPerDifficulty object>, <dispel.providers.bdh.tasks.typing.steps.ExtractWordDurationPerDifficulty object>, <dispel.providers.bdh.tasks.typing.steps.TransformWordDurationLevelDifference object>, <dispel.providers.bdh.tasks.typing.steps.AggregateWordDurationLevelSlope object>]#