Source code for dispel.signal.dtw

"""A module dedicated to the Dynamic Time Warping."""
from typing import Tuple

import numpy as np
import pandas as pd
from fastdtw import fastdtw

from dispel.signal.core import euclidean_distance


[docs] def get_minimal_matches( actual: pd.DataFrame, expected: pd.DataFrame, distance: int = 2 ) -> Tuple[float, pd.DataFrame]: """Compute minimal euclidean distance between actual and expected paths. Based on the Dynamic Time Warping algorithm (`fastdtw` library), compute all the detected attributions between an actual and an expected trajectory and then extract the minimum ones. Parameters ---------- actual The actual trajectory (x and y coordinates). expected The expected trajectory (x and y coordinates). distance The chosen metric to compute DTW, 2 being by default and means euclidean distance. Returns ------- Tuple[float, pandas.DataFrame] A coupling measure and a pandas data frame with minimal attributions indexes for actual and expected trajectories and the euclidean distance between each attribution. """ path_model = expected[["x", "y"]].to_numpy() # Detect if up-sampling has been previously done on data. path_user = actual[["x", "y"]].to_numpy() # Compute the fast Dynamic Time Warping similarity measures. coupling_measure, matches = fastdtw(path_user, path_model, dist=distance) # Store matched attributions into a pandas data frame. matches = pd.DataFrame(matches).rename(columns={0: "actual", 1: "expect"}) # Compute the distance between each attributed points. matches["min_distance"] = matches.apply( lambda row: euclidean_distance( (actual["x"][row["actual"]], actual["y"][row["actual"]]), (expected["x"][row["expect"]], expected["y"][row["expect"]]), ), axis=1, ) # Keep only one attribution between a user point and a model point based on keeping # the minimum distance. min_matches = matches.groupby(by="actual").min().reset_index() return coupling_measure, min_matches
[docs] def get_dtw_distance(user: pd.DataFrame, reference: pd.DataFrame) -> pd.Series: """Extract information about DTW metrics. This implementation is using Dynamic Time Warping (`fastdtw` library) to compute the similarity measures between the user and the model paths. This algorithm returns the coupling measure value (which is the value of the similarity metric) and the attributions between a model point and the user points close enough to be considered as a potential similar point (i.e. ``attributions == [(m0, u0),(m1, u1),(m1, u2), (m1, u3), (m2, u1), (m2, u2), (m2, u2), (m2, u3), (m2,u4),...]`` with mi the ith model point index and uj the jth user point index). Then, we isolate the minimum distance between each model point and attributed user points. Those measures are in fact equivalent to those obtained with the variant Fréchet similarity measure algorithm with a back propagation. This implementation is around 30 times faster than the mentioned Fréchet algorithm. Parameters ---------- user A pandas data frame composed of the user paths or his up sampled ones. reference The reference trajectory corresponding to the current level. Returns ------- pandas.Series A pandas data frame which contains the DTW coupling measure, the mean and median minimum euclidean distance between the closest attributions, the standard deviation of minimum euclidean distance between the closest attributions and the sum of all the minimum euclidean distance between the closest attributions. """ coupling_measure, min_matches = get_minimal_matches(user, reference) # Create the pandas data frame with all measures. dtw_dict = { "dtw_coupling_measure": coupling_measure, "dtw_mean_distance": np.mean(min_matches["min_distance"]), "dtw_median_distance": np.median(min_matches["min_distance"]), "dtw_std_distance": np.std(min_matches["min_distance"]), "dtw_total_distance": np.sum(min_matches["min_distance"]), } return pd.Series(dtw_dict, name="dtw_data")