feat: adding binning feature that handles summary statistics
This commit is contained in:
parent
07457ba0eb
commit
a20f0c2069
@ -65,28 +65,38 @@ class Bin:
|
||||
summary_dict = {k: _summarise_values(v) for k, v in self.point_y_dict.items()}
|
||||
return BinSummary(mean_x, summary_dict)
|
||||
|
||||
def stdev_ys(self) -> typing.Dict[str, float]:
|
||||
return {k: v.std(axis=0, ddof=1).item() for k, v in self.point_y_dict.items()}
|
||||
|
||||
|
||||
def _construct_bins(xs: numpy.ndarray, bin_config: BinConfig) -> numpy.ndarray:
|
||||
min_x = numpy.min(xs)
|
||||
min_x_raw = numpy.min(xs)
|
||||
|
||||
# if the bin config requested bin_min is None, then we can ignore it.
|
||||
|
||||
if bin_config.bin_min is not None:
|
||||
_logger.debug(f"Received a desired bin_min={bin_config.bin_min}")
|
||||
if bin_config.bin_min > min_x:
|
||||
if bin_config.bin_min > min_x_raw:
|
||||
raise ValueError(
|
||||
f"The lowest x value of {xs=} was {min_x=}, which is lower than the requested bin_min={bin_config.bin_min}"
|
||||
f"The lowest x value of {xs=} was {min_x_raw=}, which is lower than the requested bin_min={bin_config.bin_min}"
|
||||
)
|
||||
else:
|
||||
_logger.debug(f"Setting minimum to {bin_config.bin_min}")
|
||||
min_x = bin_config.bin_min
|
||||
min_x_raw = bin_config.bin_min
|
||||
|
||||
max_x_raw = numpy.max(xs)
|
||||
|
||||
if bin_config.log_scale:
|
||||
min_x = numpy.log10(min_x_raw)
|
||||
max_x = numpy.log10(max_x_raw)
|
||||
else:
|
||||
min_x = min_x_raw
|
||||
max_x = max_x_raw
|
||||
|
||||
max_x = numpy.max(xs)
|
||||
num_points = numpy.ceil(1 + (max_x - min_x) / bin_config.bin_width)
|
||||
return min_x + (numpy.arange(0, num_points) * bin_config.bin_width)
|
||||
bins = min_x + (numpy.arange(0, num_points) * bin_config.bin_width)
|
||||
|
||||
if bin_config.log_scale:
|
||||
return 10**bins
|
||||
else:
|
||||
return bins
|
||||
|
||||
|
||||
def _populate_bins(
|
||||
|
@ -3,6 +3,11 @@ import tantri.binning.binning as binning
|
||||
import numpy
|
||||
|
||||
|
||||
def test_bin_config_validation():
|
||||
with pytest.raises(ValueError):
|
||||
binning.BinConfig(log_scale=False, bin_width=1, min_points_required=1)
|
||||
|
||||
|
||||
def test_bin_construction_faulty_min():
|
||||
x_list = numpy.array([5, 6, 7, 8])
|
||||
|
||||
@ -109,3 +114,126 @@ def test_group_x_bins_summary(snapshot):
|
||||
summary = [bin.summary_point() for bin in binned]
|
||||
|
||||
assert summary == snapshot
|
||||
|
||||
|
||||
def test_bin_construction_faulty_min_log_scale():
|
||||
x_list = numpy.array([5, 6, 7, 8])
|
||||
|
||||
bin_config = binning.BinConfig(log_scale=True, bin_width=0.8, bin_min=5.5)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
binning._construct_bins(x_list, bin_config)
|
||||
|
||||
|
||||
def test_bin_construction_force_min_log():
|
||||
"""
|
||||
This test shows the main use ofthe bin_min parameter, if we want our bins to nicely line up with decades for example,
|
||||
then we can force it by ignoring the provided minimum x.
|
||||
"""
|
||||
x_list = numpy.array([1500, 5000, 10000, 33253, 400000])
|
||||
|
||||
bin_config = binning.BinConfig(log_scale=True, bin_width=1, bin_min=10)
|
||||
|
||||
expected_bins = numpy.array([10, 100, 1000, 10000, 100000, 1000000])
|
||||
|
||||
actual_bins = binning._construct_bins(x_list, bin_config=bin_config)
|
||||
numpy.testing.assert_allclose(
|
||||
actual_bins, expected_bins, err_msg="The bins were not as expected"
|
||||
)
|
||||
|
||||
|
||||
def test_bin_construction_even_log_scale():
|
||||
x_list = numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33])
|
||||
|
||||
# bin width of 0.3 corresponds to 10^0.3 ~= 2, so we're roughly looking at
|
||||
bin_config = binning.BinConfig(log_scale=True, bin_width=0.3)
|
||||
expected_bins = numpy.array(
|
||||
[
|
||||
1.00000000000,
|
||||
1.99526231497,
|
||||
3.98107170553,
|
||||
7.94328234724,
|
||||
15.8489319246,
|
||||
31.6227766017,
|
||||
63.0957344480,
|
||||
]
|
||||
)
|
||||
|
||||
actual_bins = binning._construct_bins(x_list, bin_config=bin_config)
|
||||
numpy.testing.assert_allclose(
|
||||
actual_bins, expected_bins, err_msg="The bins were not as expected"
|
||||
)
|
||||
|
||||
|
||||
def test_group_x_bins_log(snapshot):
|
||||
x_list = numpy.array(
|
||||
[
|
||||
0.00158489,
|
||||
0.00363078,
|
||||
0.0398107,
|
||||
0.275423,
|
||||
0.524807,
|
||||
2.51189,
|
||||
8.74984,
|
||||
10.0,
|
||||
63.0957,
|
||||
3981.07,
|
||||
]
|
||||
)
|
||||
y_dict = {
|
||||
"basic_lorentzian": numpy.array(
|
||||
[
|
||||
0.159154,
|
||||
0.15915,
|
||||
0.158535,
|
||||
0.134062,
|
||||
0.0947588,
|
||||
0.00960602,
|
||||
0.000838084,
|
||||
0.000642427,
|
||||
0.0000162008,
|
||||
4.06987e-9,
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
bin_config = binning.BinConfig(log_scale=True, bin_width=2)
|
||||
# expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||
|
||||
binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||
|
||||
assert binned == snapshot
|
||||
|
||||
|
||||
def test_group_x_bins_mean_log(snapshot):
|
||||
x_list = numpy.array([0.0158489, 0.0316228, 0.0794328, 0.158489, 0.17378, 0.316228, 0.944061, 0.977237, 0.988553, 3.16228, 5.01187, 15.8489, 25.1189, 31.6228, 158.489, 630.957])
|
||||
y_dict = {
|
||||
"basic_lorentzian": (
|
||||
numpy.array([0.159056, 0.158763, 0.156715, 0.149866, 0.148118, 0.127657, 0.0497503, 0.0474191, 0.0466561, 0.00619907, 0.00252714, 0.000256378, 0.000102165, 0.0000644769, 2.56787e-6, 1.62024e-7])
|
||||
)
|
||||
}
|
||||
|
||||
bin_config = binning.BinConfig(log_scale=True, bin_width=1, bin_min=-2)
|
||||
# expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||
|
||||
binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||
mean_binned = [bin.mean_point() for bin in binned]
|
||||
|
||||
assert mean_binned == snapshot
|
||||
|
||||
|
||||
# def test_group_x_bins_summary(snapshot):
|
||||
# x_list = numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33])
|
||||
# y_dict = {
|
||||
# "identity_plus_one": (
|
||||
# numpy.array([1, 2.8, 8, 12.2, 13.6, 17, 19.71, 20, 24, 33]) + 2
|
||||
# )
|
||||
# }
|
||||
|
||||
# bin_config = binning.BinConfig(log_scale=False, bin_width=8)
|
||||
# # expected_bins = numpy.array([1, 9, 17, 25, 33])
|
||||
|
||||
# binned = binning.bin_lists(x_list, y_dict, bin_config)
|
||||
# summary = [bin.summary_point() for bin in binned]
|
||||
|
||||
# assert summary == snapshot
|
||||
|
Loading…
x
Reference in New Issue
Block a user