Two-sided grubbs test KeyError: 355

I am trying to this two sided grubbs test by passing in a pandas.Series object and an appropriate alpha value. whenever I do the test on the whole dataset, I have no problem. However, when i divide the dataset by a criterion, lets say id, in the format of a dictionary id: subset-df, and pass a series from the subset dataframe it gives me a KeyError.

This is my code:

for k, v in sensor_id_to_data.items():
#     print(f'FOR SENSOR_ID: {k}')
#     print(v['value'])
    
    outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.01)
    indices = grubbs.two_sided_test_indices(v['value'], alpha=.01)
    
    print(f'For alpha=.01, {outliers}, {indices}')
    
    outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.025)
    indices = grubbs.two_sided_test_indices(v['value'], alpha=.025)
    
    print(f'For alpha=.025, {outliers}, {indices}')
    
    outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.05)
    indices = grubbs.two_sided_test_indices(v['value'], alpha=.05)
    
    print(f'For alpha=.05, {outliers}, {indices}')
    
    print('\n')

And this is the output:

KeyError                                  Traceback (most recent call last)
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3360             try:
- 3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:

~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 355

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_15964/2069702385.py in module
      3 #     print(v['value'])
      4     df = v.copy()
---- 5     outliers = grubbs.two_sided_test_outliers(df['value'], alpha=.01)
      6     indices = grubbs.two_sided_test_indices(df['value'], alpha=.01)
      7 

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in two_sided_test_outliers(data, alpha)
    213 
    214 def two_sided_test_outliers(data, alpha=DEFAULT_ALPHA):
-- 215     return _two_sided_test(data, alpha, OutputType.OUTLIERS)
    216 
    217 

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _two_sided_test(data, alpha, output_type)
    193 
    194 def _two_sided_test(data, alpha, output_type):
-- 195     return _test(TwoSidedGrubbsTest, data, alpha, output_type)
    196 
    197 

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _test(test_class, data, alpha, output_type)
    189 
    190 def _test(test_class, data, alpha, output_type):
-- 191     return test_class(data).run(alpha, output_type=output_type)
    192 
    193 

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in run(self, alpha, output_type)
    120 
    121         while True:
-- 122             outlier_index = self._test_once(data, alpha)
    123             if outlier_index is None:
    124                 break

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _test_once(self, data, alpha)
    101         :return: the index of the outlier if one if found; None otherwise
    102         
-- 103         target_index, value = self._target(data)
    104 
    105         g = value / data.std()

~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _target(self, data)
    152         relative_values = abs(data - data.mean())
    153         index = relative_values.argmax()
-- 154         value = relative_values[index]
    155         return index, value
    156 

~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
    940 
    941         elif key_is_scalar:
-- 942             return self._get_value(key)
    943 
    944         if is_hashable(key):

~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
   1049 
   1050         # Similar to Index.get_value, but we do not fall back to positional
- 1051         loc = self.index.get_loc(label)
   1052         return self.index._get_values_for_loc(self, loc, label)
   1053 

~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:
- 3363                 raise KeyError(key) from err
   3364 
   3365         if is_scalar(key) and isna(key) and not self.hasnans:

KeyError: 355

Any ideas on what could be causing this?

Topic hypothesis-testing python-3.x ipython time-series statistics

Category Data Science

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.