Source code for dispel.providers.bdh.tasks.pdq39

"""Functionalities to process pdq39 data."""

from typing import List

import pandas as pd

from dispel.data.measures import MeasureValueDefinitionPrototype
from dispel.data.raw import RawDataValueDefinition
from dispel.data.validators import RangeValidator
from dispel.data.values import AbbreviatedValue as AV
from dispel.data.values import ValueDefinition
from dispel.processing import ProcessingStep
from dispel.processing.data_set import transformation
from dispel.processing.extract import ExtractStep
from dispel.processing.level import ProcessingStepGroup
from dispel.processing.transform import TransformStep
from dispel.providers.registry import process_factory

TASK_NAME = AV("Parkinson Disease Questionnaire", "pdq39")

DOMAINS = {
    "mobility": range(0, 10),
    "activities_of_daily_living ": range(10, 16),
    "emotional_well_being": range(16, 22),
    "stigma": range(22, 26),
    "social_support": range(26, 29),
    "cognition": range(29, 33),
    "communication": range(33, 36),
    "bodily_discomfort": range(36, 39),
}
r"""The eight functional domains that were evaluated through the survey."""


[docs] class TransformAnswerInScore(TransformStep): """Transform answers in score between 0 and 4.""" data_set_ids = "userInput" new_data_set_id = "score_per_answer_w_category"
[docs] @staticmethod @transformation def transform_answer_in_score(data: pd.DataFrame) -> pd.DataFrame: """Transform answer into a score between 0 and 4.""" df = pd.DataFrame( {"question_number": data.displayedValue, "answer": data.answer} ) # Create the score between 0 and 4 df["score"] = data.answer - 1 # Specific case for the 28th question with a possibility to answer # six different answers. The score of the first two being the same, # because (first answer is "I do not have a spouse or partner" and the # second answer is "Never") we consider they should have the same value # of 0 (best qol). df.loc[27, "score"] = max(0, df.loc[27, "score"] - 1) # Add the category df["domain"] = None for domain, _range in DOMAINS.items(): df.loc[_range, "domain"] = domain return df
definitions = [ RawDataValueDefinition("question_number", "The question number."), RawDataValueDefinition( "answer", "The answer to the question with values between 1 and 6." ), RawDataValueDefinition( "score", "The score with values between 0 and 4 (lower the better)." ), RawDataValueDefinition( "domain", "The domain of quality of life being assessed by the question." ), ]
[docs] class GroupScorePerDomain(TransformStep): """Transform question scores per domain and creates domain scores.""" data_set_ids = "score_per_answer_w_category" new_data_set_id = "domain_scores"
[docs] @staticmethod @transformation def group_score_per_domain(data: pd.DataFrame) -> pd.DataFrame: """Group question scores per domain and creates domain scores.""" group = data[["domain", "score"]].groupby("domain") res = group.count() res.rename(columns={"score": "count"}, inplace=True) res["domain_score"] = group.sum() res["domain_score_normalized"] = (res["domain_score"] / res["count"]) * 25 return res
definitions = [ RawDataValueDefinition("count", "The number of questions in the domain."), RawDataValueDefinition( "domain_score", "The sum of the score (between 0 and 4) to each individual answer " "that belongs to the domain.", ), RawDataValueDefinition( "domain_score_normalized", "The domain score normalized between 0 and 100." ), ]
[docs] class ExtractDomainScore(ExtractStep): """Extract PDQ-39 domain score.""" data_set_ids = "domain_scores"
[docs] def __init__(self, domain: str, *args, **kwargs): self.domain = domain super().__init__(*args, **kwargs)
[docs] def get_definition(self, **kwargs) -> ValueDefinition: """Overwrite get_definition.""" description = ( f"PDQ domain score for {self.domain}. It is computed " "as the sum of the scores (ranging between 0 and 4) " f"for all the questions of the domain {self.domain}." ) return MeasureValueDefinitionPrototype( measure_name=AV(f"{self.domain} score", f"{self.domain}_score"), data_type="float", validator=RangeValidator(lower_bound=0, upper_bound=100), description=description, ).create_definition(**kwargs)
[docs] @transformation def read_pdq_domain_score(self, data: pd.DataFrame) -> float: """Read pdq normalized score corresponding to the domain.""" return float(data.loc[self.domain, "domain_score_normalized"])
[docs] class ExtractTotalScore(ExtractStep): """Extract PDQ-39 total score.""" data_set_ids = "domain_scores" definition = MeasureValueDefinitionPrototype( measure_name=AV("Total PDQ-39 score", "total_score"), data_type="float", validator=RangeValidator(lower_bound=0, upper_bound=100), description="The total PDQ-39 score is the mean of the eight domains " "scores (one for each domain evaluated) and has a value" "between 0 and 100.", )
[docs] @staticmethod @transformation def read_pdq_total_score(data: pd.DataFrame) -> float: """Read PDQ39 total score.""" return data.domain_score_normalized.mean()
STEPS: List[ProcessingStep] = [ TransformAnswerInScore(), GroupScorePerDomain(), *[ExtractDomainScore(domain=domain) for domain in DOMAINS], ExtractTotalScore(), ] STEPS = [ProcessingStepGroup(STEPS, task_name=TASK_NAME)] process_pdq_39 = process_factory( task_name=TASK_NAME, steps=STEPS, codes="pdq39-activity", )