feat: adding binning feature that handles summary statistics
This commit is contained in:
parent
07457ba0eb
commit
a20f0c2069
@ -65,28 +65,38 @@ class Bin:
|
|||||||
summary_dict = {k: _summarise_values(v) for k, v in self.point_y_dict.items()}
|
summary_dict = {k: _summarise_values(v) for k, v in self.point_y_dict.items()}
|
||||||
return BinSummary(mean_x, summary_dict)
|
return BinSummary(mean_x, summary_dict)
|
||||||
|
|
||||||
def stdev_ys(self) -> typing.Dict[str, float]:
|
|
||||||
return {k: v.std(axis=0, ddof=1).item() for k, v in self.point_y_dict.items()}
|
|
||||||
|
|
||||||
|
|
||||||
def _construct_bins(xs: numpy.ndarray, bin_config: BinConfig) -> numpy.ndarray:
|
def _construct_bins(xs: numpy.ndarray, bin_config: BinConfig) -> numpy.ndarray:
|
||||||
min_x = numpy.min(xs)
|
min_x_raw = numpy.min(xs)
|
||||||
|
|
||||||
# if the bin config requested bin_min is None, then we can ignore it.
|
# if the bin config requested bin_min is None, then we can ignore it.
|
||||||
|
|
||||||
if bin_config.bin_min is not None:
|
if bin_config.bin_min is not None:
|
||||||
_logger.debug(f"Received a desired bin_min={bin_config.bin_min}")
|
_logger.debug(f"Received a desired bin_min={bin_config.bin_min}")
|
||||||
if bin_config.bin_min > min_x:
|
if bin_config.bin_min > min_x_raw:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The lowest x value of {xs=} was {min_x=}, which is lower than the requested bin_min={bin_config.bin_min}"
|
f"The lowest x value of {xs=} was {min_x_raw=}, which is lower than the requested bin_min={bin_config.bin_min}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
_logger.debug(f"Setting minimum to {bin_config.bin_min}")
|
_logger.debug(f"Setting minimum to {bin_config.bin_min}")
|
||||||
min_x = bin_config.bin_min
|
min_x_raw = bin_config.bin_min
|
||||||
|
|
||||||
|
max_x_raw = numpy.max(xs)
|
||||||
|
|
||||||
|
if bin_config.log_scale:
|
||||||
|
min_x = numpy.log10(min_x_raw)
|
||||||
|
max_x = numpy.log10(max_x_raw)
|
||||||
|
else:
|
||||||
|
min_x = min_x_raw
|
||||||
|
max_x = max_x_raw
|
||||||
|
|
||||||
max_x = numpy.max(xs)
|
|
||||||
num_points = numpy.ceil(1 + (max_x - min_x) / bin_config.bin_width)
|
num_points = numpy.ceil(1 + (max_x - min_x) / bin_config.bin_width)
|
||||||
return min_x + (numpy.arange(0, num_points) * bin_config.bin_width)
|
bins = min_x + (numpy.arange(0, num_points) * bin_config.bin_width)
|
||||||
|
|
||||||
|
if bin_config.log_scale:
|
||||||
|
return 10**bins
|
||||||
|
else:
|
||||||
|
return bins
|
||||||
|
|
||||||
|
|
||||||
def _populate_bins(
|
def _populate_bins(
|
||||||
|
@ -3,6 +3,11 @@ import tantri.binning.binning as binning
|
|||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
def test_bin_config_validation():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
binning.BinConfig(log_scale=False, bin_width=1, min_points_required=1)
|
||||||
|
|
||||||
|
|
||||||
def test_bin_construction_faulty_min():
|
def test_bin_construction_faulty_min():
|
||||||
x_list = numpy.array([5, 6, 7, 8])
|
x_list = numpy.array([5, 6, 7, 8])
|
||||||
|
|
||||||
@ -109,3 +114,126 @@ def test_group_x_bins_summary(snapshot):
|
|||||||
summary = [bin.summary_point() for bin in binned]
|
summary = [bin.summary_point() for bin in binned]
|
||||||
|
|
||||||
assert summary == snapshot
|
assert summary == snapshot
|
||||||
|
|
||||||
|
|
||||||
|
def test_bin_construction_faulty_min_log_scale():
|
||||||
|
x_list = numpy.array([5, 6, 7, 8])
|
||||||
|
|
||||||
|
bin_config = binning.BinConfig(log_scale=True, bin_width=0.8, bin_min=5.5)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
binning._construct_bins(x_list, bin_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bin_construction_force_min_log():
|
||||||
|
"""
|
||||||
|
This test shows the main use ofthe bin_min parameter, if we want our bins to nicely line up with decades for example,
|
||||||
|
then we can force it by ignoring the provided minimum x.
|
||||||
|
"""
|
||||||
|
x_list = numpy.array([1500, 5000, 10000, 33253, 400000])
|
||||||
|
|
||||||
|
bin_config = binning.BinConfig(log_scale=True, bin_width=1, bin_min=10)
|
||||||
|
|
||||||
|
expected_bins = numpy.array([10, 100, 1000, 10000, 100000, 1000000])
|
||||||
|
|
||||||
|
actual_bins = binning._construct_bins(x_list, bin_config=bin_config)
|
||||||
|
numpy.testing.assert_allclose(
|
||||||
|
actual_bins, expected_bins, err_msg="The bins were not as expected"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bin_construction_even_log_scale():
|
||||||
|
x_list = numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33])
|
||||||
|
|
||||||
|
# bin width of 0.3 corresponds to 10^0.3 ~= 2, so we're roughly looking at
|
||||||
|
bin_config = binning.BinConfig(log_scale=True, bin_width=0.3)
|
||||||
|
expected_bins = numpy.array(
|
||||||
|
[
|
||||||
|
1.00000000000,
|
||||||
|
1.99526231497,
|
||||||
|
3.98107170553,
|
||||||
|
7.94328234724,
|
||||||
|
15.8489319246,
|
||||||
|
31.6227766017,
|
||||||
|
63.0957344480,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
actual_bins = binning._construct_bins(x_list, bin_config=bin_config)
|
||||||
|
numpy.testing.assert_allclose(
|
||||||
|
actual_bins, expected_bins, err_msg="The bins were not as expected"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_group_x_bins_log(snapshot):
|
||||||
|
x_list = numpy.array(
|
||||||
|
[
|
||||||
|
0.00158489,
|
||||||
|
0.00363078,
|
||||||
|
0.0398107,
|
||||||
|
0.275423,
|
||||||
|
0.524807,
|
||||||
|
2.51189,
|
||||||
|
8.74984,
|
||||||
|
10.0,
|
||||||
|
63.0957,
|
||||||
|
3981.07,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
y_dict = {
|
||||||
|
"basic_lorentzian": numpy.array(
|
||||||
|
[
|
||||||
|
0.159154,
|
||||||
|
0.15915,
|
||||||
|
0.158535,
|
||||||
|
0.134062,
|
||||||
|
0.0947588,
|
||||||
|
0.00960602,
|
||||||
|
0.000838084,
|
||||||
|
0.000642427,
|
||||||
|
0.0000162008,
|
||||||
|
4.06987e-9,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
bin_config = binning.BinConfig(log_scale=True, bin_width=2)
|
||||||
|
# expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||||
|
|
||||||
|
binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||||
|
|
||||||
|
assert binned == snapshot
|
||||||
|
|
||||||
|
|
||||||
|
def test_group_x_bins_mean_log(snapshot):
|
||||||
|
x_list = numpy.array([0.0158489, 0.0316228, 0.0794328, 0.158489, 0.17378, 0.316228, 0.944061, 0.977237, 0.988553, 3.16228, 5.01187, 15.8489, 25.1189, 31.6228, 158.489, 630.957])
|
||||||
|
y_dict = {
|
||||||
|
"basic_lorentzian": (
|
||||||
|
numpy.array([0.159056, 0.158763, 0.156715, 0.149866, 0.148118, 0.127657, 0.0497503, 0.0474191, 0.0466561, 0.00619907, 0.00252714, 0.000256378, 0.000102165, 0.0000644769, 2.56787e-6, 1.62024e-7])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
bin_config = binning.BinConfig(log_scale=True, bin_width=1, bin_min=-2)
|
||||||
|
# expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||||
|
|
||||||
|
binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||||
|
mean_binned = [bin.mean_point() for bin in binned]
|
||||||
|
|
||||||
|
assert mean_binned == snapshot
|
||||||
|
|
||||||
|
|
||||||
|
# def test_group_x_bins_summary(snapshot):
|
||||||
|
# x_list = numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33])
|
||||||
|
# y_dict = {
|
||||||
|
# "identity_plus_one": (
|
||||||
|
# numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33]) + 2
|
||||||
|
# )
|
||||||
|
# }
|
||||||
|
|
||||||
|
# bin_config = binning.BinConfig(log_scale=False, bin_width=8)
|
||||||
|
# # expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||||
|
|
||||||
|
# binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||||
|
# summary = [bin.summary_point() for bin in binned]
|
||||||
|
|
||||||
|
# assert summary == snapshot
|
||||||
|
Loading…
x
Reference in New Issue
Block a user