Source code for dispel.processing.assertions

"""Assertions to be made on readings as part of processing steps."""
from abc import ABCMeta
from typing import Optional, Sequence

import pandas as pd
from deprecated.sphinx import deprecated

from dispel.data.core import Reading
from dispel.data.levels import Level
from dispel.processing.core import (
    ErrorHandling,
    ProcessingControlResult,
    ProcessingStep,
    ProcessResultType,
    StopProcessingError,
)
from dispel.processing.data_set import DataSetProcessingStepProtocol
from dispel.processing.level import (
    LevelFilterType,
    LevelProcessingControlResult,
    LevelProcessingStep,
)


[docs] class AssertEvaluationFinished(ProcessingStep): """Assertion to ensure evaluations are finished."""
[docs] def process_reading(self, reading: Reading, **kwargs) -> ProcessResultType: """Ensure reading evaluation is finished.""" if not reading.evaluation.finished: yield ProcessingControlResult( step=self, error=StopProcessingError("evaluation is not finished", self), error_handling=ErrorHandling.RAISE, )
[docs] @deprecated(version="0.0.51", reason="Use assert_level_valid") class AssertRawDataSetPresent(LevelProcessingStep): """Assertion to ensure specific data sets are present."""
[docs] def __init__( self, data_set_id: str, level_filter: Optional[LevelFilterType] = None ): super().__init__(level_filter=level_filter) self.data_set_id = data_set_id
[docs] def process_level( self, level: Level, reading: Reading, **kwargs ) -> ProcessResultType: """Ensure level has data set id.""" if not level.has_raw_data_set(self.data_set_id): yield LevelProcessingControlResult( step=self, error=StopProcessingError( f"data set {self.data_set_id} is missing", self ), level=level, error_handling=ErrorHandling.RAISE, )
[docs] class NotEmptyDataSetAssertionMixin(DataSetProcessingStepProtocol, metaclass=ABCMeta): """A mixin to ensure that processed data sets are not empty.""" #: The assertion message assertion_message = "Empty dataset {data_set_id} for level {level}" #: The handling if a data set is empty empty_data_set_handling = ErrorHandling.RAISE
[docs] def assert_valid_data_sets( self, data_sets: Sequence[pd.DataFrame], level: Level, reading: Reading, **kwargs, ): """Assert that data sets are not empty.""" super().assert_valid_data_sets(data_sets, level, reading, **kwargs) for data, data_set_id in zip(data_sets, self.get_data_set_ids()): assert not data.empty, ( self.assertion_message.format(data_set_id=data_set_id, level=level), self.empty_data_set_handling, )