"""Functionalities to process pdq39 data."""
from typing import List
import pandas as pd
from dispel.data.measures import MeasureValueDefinitionPrototype
from dispel.data.raw import RawDataValueDefinition
from dispel.data.validators import RangeValidator
from dispel.data.values import AbbreviatedValue as AV
from dispel.data.values import ValueDefinition
from dispel.processing import ProcessingStep
from dispel.processing.data_set import transformation
from dispel.processing.extract import ExtractStep
from dispel.processing.level import ProcessingStepGroup
from dispel.processing.transform import TransformStep
from dispel.providers.registry import process_factory
TASK_NAME = AV("Parkinson Disease Questionnaire", "pdq39")
DOMAINS = {
"mobility": range(0, 10),
"activities_of_daily_living ": range(10, 16),
"emotional_well_being": range(16, 22),
"stigma": range(22, 26),
"social_support": range(26, 29),
"cognition": range(29, 33),
"communication": range(33, 36),
"bodily_discomfort": range(36, 39),
}
r"""The eight functional domains that were evaluated through the survey."""
[docs]
class GroupScorePerDomain(TransformStep):
"""Transform question scores per domain and creates domain scores."""
data_set_ids = "score_per_answer_w_category"
new_data_set_id = "domain_scores"
[docs]
@staticmethod
@transformation
def group_score_per_domain(data: pd.DataFrame) -> pd.DataFrame:
"""Group question scores per domain and creates domain scores."""
group = data[["domain", "score"]].groupby("domain")
res = group.count()
res.rename(columns={"score": "count"}, inplace=True)
res["domain_score"] = group.sum()
res["domain_score_normalized"] = (res["domain_score"] / res["count"]) * 25
return res
definitions = [
RawDataValueDefinition("count", "The number of questions in the domain."),
RawDataValueDefinition(
"domain_score",
"The sum of the score (between 0 and 4) to each individual answer "
"that belongs to the domain.",
),
RawDataValueDefinition(
"domain_score_normalized", "The domain score normalized between 0 and 100."
),
]
[docs]
class ExtractDomainScore(ExtractStep):
"""Extract PDQ-39 domain score."""
data_set_ids = "domain_scores"
[docs]
def __init__(self, domain: str, *args, **kwargs):
self.domain = domain
super().__init__(*args, **kwargs)
[docs]
def get_definition(self, **kwargs) -> ValueDefinition:
"""Overwrite get_definition."""
description = (
f"PDQ domain score for {self.domain}. It is computed "
"as the sum of the scores (ranging between 0 and 4) "
f"for all the questions of the domain {self.domain}."
)
return MeasureValueDefinitionPrototype(
measure_name=AV(f"{self.domain} score", f"{self.domain}_score"),
data_type="float",
validator=RangeValidator(lower_bound=0, upper_bound=100),
description=description,
).create_definition(**kwargs)
[docs]
@transformation
def read_pdq_domain_score(self, data: pd.DataFrame) -> float:
"""Read pdq normalized score corresponding to the domain."""
return float(data.loc[self.domain, "domain_score_normalized"])
STEPS: List[ProcessingStep] = [
TransformAnswerInScore(),
GroupScorePerDomain(),
*[ExtractDomainScore(domain=domain) for domain in DOMAINS],
ExtractTotalScore(),
]
STEPS = [ProcessingStepGroup(STEPS, task_name=TASK_NAME)]
process_pdq_39 = process_factory(
task_name=TASK_NAME,
steps=STEPS,
codes="pdq39-activity",
)