"""Functionality to read ADS files."""
import warnings
from typing import Any, Dict, Iterable, List, Optional, Union, cast
import pandas as pd
from dispel.data.core import Device, Evaluation, ReadingSchema, Session
from dispel.data.devices import (
AndroidPlatform,
AndroidScreen,
IOSPlatform,
IOSScreen,
Screen,
)
from dispel.data.epochs import EpochDefinition
from dispel.data.levels import Context, Level
from dispel.data.raw import (
RawDataSet,
RawDataSetDefinition,
RawDataSetSource,
RawDataValueDefinition,
)
from dispel.data.values import Value, ValueDefinition
from dispel.io.core import (
convert_data_frame_type,
convert_literal_type,
get_data_type_mapping,
)
from dispel.io.utils import load_json
from dispel.providers.ads.data import ADSModalities, ADSReading
from dispel.providers.generic.tasks.cps.utils import (
EXPECTED_DURATION_D2D,
EXPECTED_DURATION_S2D,
LEVEL_DURATION_DEF,
)
from dispel.providers.registry import register_reader
[docs]
def parse_screen(platform: str, screen_dict: dict) -> Screen:
"""Parse a screen dictionary into a Screen class.
Parameters
----------
platform
The device platform.
screen_dict
The screen information dictionary.
Returns
-------
Screen
The screen information.
Raises
------
ValueError
If given an unsupported platform.
"""
width_dp_pt = screen_dict["widthPixels"]
height_dp_pt = screen_dict["heightPixels"]
if platform == "iOS":
scale_factor = screen_dict["scaleFactor"]
return IOSScreen(
width_pixels=width_dp_pt * scale_factor,
height_pixels=height_dp_pt * scale_factor,
density_dpi=screen_dict["densityDpi"],
width_dp_pt=width_dp_pt,
height_dp_pt=height_dp_pt,
scale_factor=scale_factor,
)
if platform == "Android":
return AndroidScreen(
width_dp_pt,
height_dp_pt,
screen_dict["densityDpi"],
screen_dict["xDpi"],
screen_dict["yDpi"],
)
raise ValueError(
"Platform only supports the following values: `iOS` and `Android`."
)
[docs]
def parse_device(device_dict: dict) -> Device:
"""Parse a device dictionary into a Device class.
Parameters
----------
device_dict
The device information dictionary.
Returns
-------
Device
The device information.
"""
platform_str = device_dict["platform"]
platform = IOSPlatform() if platform_str == "iOS" else AndroidPlatform()
screen = parse_screen(platform_str, device_dict["screen"])
version = (
device_dict["osVersion"]
if platform_str == "iOS"
else device_dict["kernelVersion"]
)
return Device(
None,
platform,
device_dict.get("model"),
device_dict["modelCode"],
version,
device_dict["versionNumber"],
device_dict["buildNumber"],
screen,
)
[docs]
def parse_session(session_dict: dict) -> Session:
"""Parse the session information into a Session class.
Parameters
----------
session_dict
The session information dictionary.
Returns
-------
Session
The session related information.
"""
start = pd.Timestamp(session_dict["startDate"]).tz_convert(None)
end = pd.Timestamp(session_dict["endDate"]).tz_convert(None)
return Session(
start=start,
end=end,
definition=EpochDefinition(id_=session_dict["sessionCode"]),
uuid=session_dict["uuidSession"],
evaluation_codes=session_dict.get("evaluationCodes"),
)
[docs]
def parse_evaluation(
id_: str,
evaluation: Dict[str, Union[str, bool, int]],
user_id: Optional[str] = None,
) -> Evaluation:
"""Parse the evaluation information into an Evaluation class.
Parameters
----------
id_
The evaluation identifier
evaluation
The evaluation dictionary information
user_id
The identifier of the user
Returns
-------
Evaluation
The evaluation related information.
"""
start = pd.Timestamp(evaluation["beginTimestamp"], unit="ms")
end = pd.Timestamp(evaluation["endTimestamp"], unit="ms")
evaluation_code = cast(str, evaluation["code"])
return Evaluation(
start=start,
end=end,
definition=EpochDefinition(id_=evaluation_code),
uuid=id_,
finished=cast(bool, evaluation["finished"]),
exit_reason=cast(str, evaluation["exitReason"]),
user_id=user_id,
)
[docs]
def create_ads_value_definitions(
value_sample: List[dict], raw_data_set_id: str
) -> List[RawDataValueDefinition]:
"""Create ADS related value definition classes.
Parameters
----------
value_sample
A list of an ads raw data sensor value level
raw_data_set_id
The raw data set id
Returns
-------
List[RawDataValueDefinition]
"""
value_definitions = []
for value_dict in value_sample:
unit = value_dict["unit"] if (value_dict["unit"] != "n/a") else None
value_definitions.append(
RawDataValueDefinition(
value_dict["name"],
".".join([raw_data_set_id, value_dict["name"]]),
unit=unit,
data_type=get_data_type_mapping(value_dict["name"]),
)
)
return value_definitions
[docs]
def create_ads_raw_data_set_definition(
value_definitions: Iterable[RawDataValueDefinition], raw_data_set_id: str
) -> RawDataSetDefinition:
"""Create ADS raw data set definition.
Parameters
----------
value_definitions
An iterable of raw data value definitions
raw_data_set_id
The raw data set id
Returns
-------
RawDataSetDefinition
The definition of the raw data set
"""
# check if value is computed or measured
is_computed = raw_data_set_id not in ["accelerometer", "gyroscope", "gps"]
return RawDataSetDefinition(
raw_data_set_id, RawDataSetSource("ADS"), value_definitions, is_computed
)
def _frame_measurement_data(measurements: List[dict]) -> pd.DataFrame:
"""Convert a list of raw ADS measurements to a pandas data frame format.
Parameters
----------
measurements
A list of dictionary value measurements
Returns
-------
pandas.DataFrame
"""
columns = [value["name"] for value in measurements[0]["values"]]
data = pd.DataFrame(
[
[value["value"] for value in measurement["values"]]
for measurement in measurements
],
columns=columns,
)
return convert_data_frame_type(data)
[docs]
def get_ads_raw_data_set(level: dict, raw_data_set_id: str) -> pd.DataFrame:
"""Read the ads raw data sets.
Parameters
----------
level
An evaluation level as in ADS json format, e.g.
``data['mobileEvaluationTest']['levels'][level_num]`` with
``level_num in {0,1,2,...}``
raw_data_set_id
The raw data set id
Returns
-------
pandas.DataFrame
The raw data set data frame.
Raises
------
ValueError
If the raw data set id is not found in level data.
"""
# go through sensors (e.g. level['sensors'] = {'user_input', 'screen'})
for sensor in filter(lambda s: s["name"] == raw_data_set_id, level["sensors"]):
raw_data_set = _frame_measurement_data(sensor["measurements"])
return raw_data_set
raise ValueError(f"Unknown raw_data_set_id {raw_data_set_id}")
[docs]
def parse_raw_data_set_value_definitions(
level: dict, raw_data_set_id: str
) -> RawDataSetDefinition:
"""Parse ADS raw data set value definitions.
Parameters
----------
level : dict
An evaluation level as in ADS json format, e.g.
``level = data['mobileEvaluationTest']['levels'][level_id]`` with
``level_id`` in ``{0, 1, 2, ...}``
raw_data_set_id
The raw data set id
Returns
-------
RawDataSetDefinition
"""
# initialize value_definition
value_definitions = []
# go through sensors (e.g. level['sensors'] = {'user_input', 'screen'})
for sensor in level["sensors"]:
# check if id matches the sensor
if sensor["name"] == raw_data_set_id:
# get measurements values and create definition
value_definitions = create_ads_value_definitions(
sensor["measurements"][0]["values"], raw_data_set_id
)
# create definition
definition = create_ads_raw_data_set_definition(value_definitions, raw_data_set_id)
return definition
[docs]
def create_ads_raw_data_set(data: dict, raw_data_set_id: str) -> RawDataSet:
"""Create ADS raw data set.
Parameters
----------
data : dict
An evaluation level as in ADS json format, e.g.
``data['mobileEvaluationTest']['levels'][level_num]`` with
``level_num`` in ``{0, 1, 2, ...}``
raw_data_set_id
The raw data set id
Returns
-------
RawDataSet
"""
# parse raw dataset and set definitions
definition = parse_raw_data_set_value_definitions(data, raw_data_set_id)
# create dataframe and update types
data_frame = convert_data_frame_type(get_ads_raw_data_set(data, raw_data_set_id))
return RawDataSet(definition, data_frame)
[docs]
def get_ads_raw_data_set_ids(data: dict) -> Iterable[str]:
"""Get the ads raw data sets ids.
Parameters
----------
data
A sample of an ads raw data sensor level
Returns
-------
Generator[str, None, None]
The list of raw data set ids
"""
return (sensor["name"] for sensor in data if len(sensor["measurements"]) > 0)
[docs]
def parse_context(data: List) -> Context:
"""Parse the context information available for each level.
Parameters
----------
data
A dictionary extracted from a json corresponding to the context related
information
Returns
-------
Context
The context representation of the passed ``data``.
"""
def _values_for_context(value_data: Dict) -> Value:
name = value_data["name"]
return Value(
ValueDefinition(name, name, value_data["unit"]),
convert_literal_type(name, value_data["value"]),
)
values = [_values_for_context(item) for item in data]
return Context(values)
[docs]
def enrich_context(context, evaluation_code, level_modalities):
"""Enrich the context information with test specific information."""
# CPS
if evaluation_code == "cps":
assert isinstance(level_modalities, list)
if level_modalities[0] == "symbol_to_digit":
duration = EXPECTED_DURATION_S2D
elif level_modalities[0] == "digit_to_digit":
duration = EXPECTED_DURATION_D2D
else:
raise ValueError(f"unexpected modality {level_modalities}")
context.set(duration, LEVEL_DURATION_DEF)
[docs]
def parse_level(
data: Dict[str, Any], evaluation_code: str, ads_modalities: ADSModalities
) -> Level:
"""Parse a specific level.
Parameters
----------
data
A dictionary extracted from a json corresponding to a level.
evaluation_code
The evaluation code, e.g. ``CPS``.
ads_modalities
The AdS modalities object.
Returns
-------
Level
The level representation of ``data``.
"""
context = parse_context(data["contexts"])
start = pd.Timestamp(int(data["beginTimestamp"]), unit="ms")
end = pd.Timestamp(int(data["endTimestamp"]), unit="ms")
# level_id
level_modalities = ads_modalities.get_modalities_from_context(
evaluation_code=evaluation_code, context=context
)
raw_data_set_ids = get_ads_raw_data_set_ids(data["sensors"])
# Remove duplicate UserInput from Mood
if evaluation_code == "mood":
raw_data_set_ids = list(raw_data_set_ids)
if raw_data_set_ids.count("userInput") > 1:
warnings.warn(
"Several answers to the same question have been detected. The "
"last answer is kept.",
UserWarning,
)
raw_data_set_ids = ["userInput"]
data["sensors"] = [data["sensors"][-1]]
# init raw_data_sets data structure
raw_data_sets = []
for raw_data_set_id in raw_data_set_ids:
raw_data_sets.append(create_ads_raw_data_set(data, raw_data_set_id))
enrich_context(context, evaluation_code, level_modalities)
# fill levels
return Level(
id_=level_modalities,
start=start,
end=end,
context=context,
raw_data_sets=raw_data_sets,
measure_set=None,
)
[docs]
def parse_levels(
data: dict, evaluation_code: str, ads_modalities: ADSModalities
) -> Optional[List[Level]]:
r"""Extract a list of Level from ``levels_data``.
Here ``levels_data`` refers to ``data['mobileEvaluationTest']['levels']``.
Parameters
----------
data
A dictionary extracted from a json corresponding to every level
related information.
evaluation_code
The evaluation code, e.g. ``CPS``
ads_modalities
The AdS modalities object.
Returns
-------
List[Level]
A list of :class:`~dispel.data.core.Level`\ s.
"""
if not data:
return None
levels = []
for level_data in data:
levels.append(parse_level(level_data, evaluation_code, ads_modalities))
return levels
[docs]
def parsable_ads_json(value: Any) -> bool:
"""Infer if a value can be automatically read with :func:`parse_ads_raw_json`."""
if not isinstance(value, dict):
return False
return "mobileEvaluationTest" in value.keys()
[docs]
@register_reader(parsable_ads_json, ADSReading)
def parse_ads_raw_json(data: dict) -> ADSReading:
"""Parse data from ADS JSON file.
Parameters
----------
data
The ADS raw data.
Returns
-------
ADSReading
The :class:`~dispel.data.ads.ADSReading` representation of the ADS JSON
raw data.
"""
evaluation = parse_evaluation(
data["uuid"], data["mobileEvaluationTest"], data.get("userId")
)
# sessions
session_data = data.get("mobileSession")
if session_data is not None:
session = parse_session(session_data)
else:
# `data['mobileSession']` is `None` in the passive test.
session = Session(
start=evaluation.start,
end=evaluation.end,
definition=EpochDefinition(id_="n/a"),
uuid="n/a",
)
schema = ReadingSchema("ADS", "konectom", "1.0")
device = parse_device(data["mobileDevice"])
# parse levels
levels_data = data["mobileEvaluationTest"]["levels"]
levels = parse_levels(
levels_data, str(evaluation.id), ADSModalities(device.app_version_number)
)
return ADSReading(
evaluation=evaluation,
session=session,
levels=levels,
schema=schema,
date=data.get("receptionDate"),
device=device,
)
[docs]
def read_ads(path: str) -> ADSReading:
"""Read raw ADS JSON file.
Parameters
----------
path
The path to the JSON file containing the data to be read.
Returns
-------
ADSReading
"""
return parse_ads_raw_json(load_json(path, "utf-8"))