Two-sided grubbs test KeyError: 355
I am trying to this two sided grubbs test by passing in a pandas.Series
object and an appropriate alpha value. whenever I do the test on the whole dataset, I have no problem. However, when i divide the dataset by a criterion, lets say id, in the format of a dictionary id: subset-df, and pass a series from the subset dataframe it gives me a KeyError
.
This is my code:
for k, v in sensor_id_to_data.items():
# print(f'FOR SENSOR_ID: {k}')
# print(v['value'])
outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.01)
indices = grubbs.two_sided_test_indices(v['value'], alpha=.01)
print(f'For alpha=.01, {outliers}, {indices}')
outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.025)
indices = grubbs.two_sided_test_indices(v['value'], alpha=.025)
print(f'For alpha=.025, {outliers}, {indices}')
outliers = grubbs.two_sided_test_outliers(v['value'], alpha=.05)
indices = grubbs.two_sided_test_indices(v['value'], alpha=.05)
print(f'For alpha=.05, {outliers}, {indices}')
print('\n')
And this is the output:
KeyError Traceback (most recent call last)
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
- 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 355
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_15964/2069702385.py in module
3 # print(v['value'])
4 df = v.copy()
---- 5 outliers = grubbs.two_sided_test_outliers(df['value'], alpha=.01)
6 indices = grubbs.two_sided_test_indices(df['value'], alpha=.01)
7
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in two_sided_test_outliers(data, alpha)
213
214 def two_sided_test_outliers(data, alpha=DEFAULT_ALPHA):
-- 215 return _two_sided_test(data, alpha, OutputType.OUTLIERS)
216
217
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _two_sided_test(data, alpha, output_type)
193
194 def _two_sided_test(data, alpha, output_type):
-- 195 return _test(TwoSidedGrubbsTest, data, alpha, output_type)
196
197
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _test(test_class, data, alpha, output_type)
189
190 def _test(test_class, data, alpha, output_type):
-- 191 return test_class(data).run(alpha, output_type=output_type)
192
193
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in run(self, alpha, output_type)
120
121 while True:
-- 122 outlier_index = self._test_once(data, alpha)
123 if outlier_index is None:
124 break
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _test_once(self, data, alpha)
101 :return: the index of the outlier if one if found; None otherwise
102
-- 103 target_index, value = self._target(data)
104
105 g = value / data.std()
~\Anaconda3\envs\pythonProject\lib\site-packages\outliers\smirnov_grubbs.py in _target(self, data)
152 relative_values = abs(data - data.mean())
153 index = relative_values.argmax()
-- 154 value = relative_values[index]
155 return index, value
156
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
-- 942 return self._get_value(key)
943
944 if is_hashable(key):
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
- 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
~\Anaconda3\envs\pythonProject\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
- 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 355
Any ideas on what could be causing this?
Topic hypothesis-testing python-3.x ipython time-series statistics
Category Data Science