Source code for pylimer_tools.utils.data_utility

import warnings

import pandas as pd


[docs] def get_tail(data, percentage=0.2, min_n=25, max_percentage=0.5): """ Extract the last few entries of a list :param data: The list, DataFrame, or Series to extract the last few entries from :type data: list or pd.DataFrame or pd.Series :param percentage: The percentage of entries to extract (default: 0.2) :type percentage: float :param min_n: The minimum number of entries to extract (default: 25) :type min_n: int :param max_percentage: The maximum percentage of entries to extract (default: 0.5) :type max_percentage: float :return: A subset of the input data containing the last entries according to the specified criteria :rtype: Same type as input data The function returns a subset with at maximum max_percentage, at least min_n entries (assuming the initial data is as large), but ideally `percentage` many percentage of the last entries. """ assert percentage <= 1 assert max_percentage <= 1 tail_n = int( min( max(min(min_n, max_percentage * len(data)), percentage * len(data)), len(data), ) ) if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series): return data.tail(tail_n) else: return data[-tail_n:]
[docs] def unify_data_stepsizes( data: pd.DataFrame, key: str, step_size: int = None, max_expected_step_size: int = 100, ) -> pd.DataFrame: """ Get a DataFrame where all data points have the same step between the values in column given by `key` :param data: The DataFrame to unify the step-size for :type data: pd.DataFrame :param key: The column name indicating the column containing the step-nr :type key: str :param step_size: The step size to use for filtering (if None, computed automatically) :type step_size: int, optional :param max_expected_step_size: Used to get a warning if the computed step-size is larger :type max_expected_step_size: int, default=100 :return: A DataFrame with a consistent step-size :rtype: pd.DataFrame NOTE: this function is rather unstable, as it has a few assumptions: - steps are modulo stepsize. Breaks e.g. with steps start with 1 and go up by step_size. - ideal step-size is max step difference. Breaks e.g. if there is one big gap """ # lenBefore = len(data) if step_size is None: step_size = data[key].sort_values().diff().max() if step_size > max_expected_step_size: warnings.warn( "Step size {} unexpectedly large, with max expected {}".format( step_size, max_expected_step_size ) ) data = data[(data[key] % step_size) == 0] # print("Reduced from {} to {} data-points using step size of {}".format(lenBefore, len(data), step_size)) return data