feat: adds subset simulation stuff
This commit is contained in:
310
deepdog/subset_simulation/subset_simulation_impl.py
Normal file
310
deepdog/subset_simulation/subset_simulation_impl.py
Normal file
@@ -0,0 +1,310 @@
|
||||
import logging
|
||||
import numpy
|
||||
import pdme.measurement
|
||||
import pdme.measurement.input_types
|
||||
import pdme.subspace_simulation
|
||||
import bisect
|
||||
from typing import Sequence, Tuple, Optional
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubsetSimulationResult:
|
||||
probs_list: Sequence[Tuple]
|
||||
over_target_cost: Optional[float]
|
||||
over_target_likelihood: Optional[float]
|
||||
under_target_cost: Optional[float]
|
||||
under_target_likelihood: Optional[float]
|
||||
|
||||
|
||||
class SubsetSimulation:
|
||||
def __init__(
|
||||
self,
|
||||
model_name_pair,
|
||||
dot_inputs,
|
||||
actual_measurements: Sequence[pdme.measurement.DotMeasurement],
|
||||
n_c: int,
|
||||
n_s: int,
|
||||
m_max: int,
|
||||
target_cost: Optional[float] = None,
|
||||
level_0_seed: int = 200,
|
||||
mcmc_seed: int = 20,
|
||||
use_adaptive_steps=True,
|
||||
default_phi_step=0.01,
|
||||
default_theta_step=0.01,
|
||||
default_r_step=0.01,
|
||||
default_w_log_step=0.01,
|
||||
default_upper_w_log_step=4,
|
||||
):
|
||||
name, model = model_name_pair
|
||||
self.model_name = name
|
||||
self.model = model
|
||||
_logger.info(f"got model {self.model_name}")
|
||||
|
||||
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
|
||||
dot_inputs
|
||||
)
|
||||
# _logger.debug(f"actual measurements: {actual_measurements}")
|
||||
self.actual_measurement_array = numpy.array([m.v for m in actual_measurements])
|
||||
|
||||
def cost_function_to_use(dipoles_to_test):
|
||||
return pdme.subspace_simulation.proportional_costs_vs_actual_measurement(
|
||||
self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test
|
||||
)
|
||||
|
||||
self.cost_function_to_use = cost_function_to_use
|
||||
|
||||
self.n_c = n_c
|
||||
self.n_s = n_s
|
||||
self.m_max = m_max
|
||||
|
||||
self.level_0_seed = level_0_seed
|
||||
self.mcmc_seed = mcmc_seed
|
||||
|
||||
self.use_adaptive_steps = use_adaptive_steps
|
||||
self.default_phi_step = default_phi_step
|
||||
self.default_theta_step = default_theta_step
|
||||
self.default_r_step = default_r_step
|
||||
self.default_w_log_step = default_w_log_step
|
||||
self.default_upper_w_log_step = default_upper_w_log_step
|
||||
|
||||
_logger.info(f"using params:")
|
||||
_logger.info(f"\tn_c: {self.n_c}")
|
||||
_logger.info(f"\tn_s: {self.n_s}")
|
||||
_logger.info(f"\tm: {self.m_max}")
|
||||
_logger.info(f"let's do level 0...")
|
||||
|
||||
self.target_cost = target_cost
|
||||
_logger.info(f"will stop at target cost {target_cost}")
|
||||
|
||||
def execute(self) -> SubsetSimulationResult:
|
||||
|
||||
probs_list = []
|
||||
|
||||
sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
|
||||
self.n_c * self.n_s,
|
||||
-1,
|
||||
rng_to_use=numpy.random.default_rng(self.level_0_seed),
|
||||
)
|
||||
# _logger.debug(sample_dipoles)
|
||||
# _logger.debug(sample_dipoles.shape)
|
||||
costs = self.cost_function_to_use(sample_dipoles)
|
||||
|
||||
_logger.debug(f"costs: {costs}")
|
||||
sorted_indexes = costs.argsort()[::-1]
|
||||
|
||||
_logger.debug(costs[sorted_indexes])
|
||||
_logger.debug(sample_dipoles[sorted_indexes])
|
||||
|
||||
sorted_costs = costs[sorted_indexes]
|
||||
sorted_dipoles = sample_dipoles[sorted_indexes]
|
||||
|
||||
threshold_cost = sorted_costs[-self.n_c]
|
||||
|
||||
all_dipoles = numpy.array(
|
||||
[
|
||||
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp)
|
||||
for samp in sorted_dipoles
|
||||
]
|
||||
)
|
||||
all_chains = list(zip(sorted_costs, all_dipoles))
|
||||
|
||||
mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
|
||||
|
||||
for i in range(self.m_max):
|
||||
next_seeds = all_chains[-self.n_c :]
|
||||
|
||||
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
|
||||
probs_list.append(
|
||||
(
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (i)),
|
||||
cost_chain[0],
|
||||
i + 1,
|
||||
)
|
||||
)
|
||||
|
||||
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||
|
||||
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||
_logger.info(f"got stdevs: {stdevs.stdevs}")
|
||||
|
||||
all_chains = []
|
||||
for c, s in next_seeds:
|
||||
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
|
||||
# until new version gotta do
|
||||
chain = self.model.get_mcmc_chain(
|
||||
s,
|
||||
self.cost_function_to_use,
|
||||
self.n_s,
|
||||
threshold_cost,
|
||||
stdevs,
|
||||
initial_cost=c,
|
||||
rng_arg=mcmc_rng,
|
||||
)
|
||||
for cost, chained in chain:
|
||||
try:
|
||||
filtered_cost = cost[0]
|
||||
except IndexError:
|
||||
filtered_cost = cost
|
||||
all_chains.append((filtered_cost, chained))
|
||||
|
||||
# _logger.debug(all_chains)
|
||||
|
||||
all_chains.sort(key=lambda c: c[0], reverse=True)
|
||||
|
||||
threshold_cost = all_chains[-self.n_c][0]
|
||||
_logger.info(
|
||||
f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}"
|
||||
)
|
||||
if (self.target_cost is not None) and (threshold_cost < self.target_cost):
|
||||
_logger.info(
|
||||
f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early"
|
||||
)
|
||||
|
||||
cost_list = [c[0] for c in all_chains]
|
||||
over_index = reverse_bisect_right(cost_list, self.target_cost)
|
||||
|
||||
shorter_probs_list = []
|
||||
for cost_index, cost_chain in enumerate(all_chains):
|
||||
probs_list.append(
|
||||
(
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (i)),
|
||||
cost_chain[0],
|
||||
i + 1,
|
||||
)
|
||||
)
|
||||
shorter_probs_list.append(
|
||||
(
|
||||
cost_chain[0],
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (i)),
|
||||
)
|
||||
)
|
||||
# _logger.info(shorter_probs_list)
|
||||
result = SubsetSimulationResult(
|
||||
probs_list=probs_list,
|
||||
over_target_cost=shorter_probs_list[over_index - 1][0],
|
||||
over_target_likelihood=shorter_probs_list[over_index - 1][1],
|
||||
under_target_cost=shorter_probs_list[over_index][0],
|
||||
under_target_likelihood=shorter_probs_list[over_index][1],
|
||||
)
|
||||
return result
|
||||
|
||||
# _logger.debug([c[0] for c in all_chains[-n_c:]])
|
||||
_logger.info(f"doing level {i + 1}")
|
||||
|
||||
for cost_index, cost_chain in enumerate(all_chains):
|
||||
probs_list.append(
|
||||
(
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (self.m_max)),
|
||||
cost_chain[0],
|
||||
self.m_max + 1,
|
||||
)
|
||||
)
|
||||
threshold_cost = all_chains[-self.n_c][0]
|
||||
_logger.info(
|
||||
f"final threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{self.m_max + 1}"
|
||||
)
|
||||
for a in all_chains[-10:]:
|
||||
_logger.info(a)
|
||||
# for prob, prob_cost in probs_list:
|
||||
# _logger.info(f"\t{prob}: {prob_cost}")
|
||||
probs_list.sort(key=lambda c: c[0], reverse=True)
|
||||
result = SubsetSimulationResult(
|
||||
probs_list=probs_list,
|
||||
over_target_cost=None,
|
||||
over_target_likelihood=None,
|
||||
under_target_cost=None,
|
||||
under_target_likelihood=None,
|
||||
)
|
||||
return result
|
||||
|
||||
def get_stdevs_from_arrays(
|
||||
self, array
|
||||
) -> pdme.subspace_simulation.MCMCStandardDeviation:
|
||||
# stdevs = get_stdevs_from_arrays(next_seeds_as_array, model)
|
||||
if self.use_adaptive_steps:
|
||||
|
||||
stdev_array = []
|
||||
count = array.shape[1]
|
||||
for dipole_index in range(count):
|
||||
selected = array[:, dipole_index]
|
||||
pxs = selected[:, 0]
|
||||
pys = selected[:, 1]
|
||||
pzs = selected[:, 2]
|
||||
thetas = numpy.arccos(pzs / self.model.pfixed)
|
||||
phis = numpy.arctan2(pys, pxs)
|
||||
|
||||
rstdevs = numpy.maximum(
|
||||
numpy.std(selected, axis=0)[3:6],
|
||||
self.default_r_step / (self.n_s * 10),
|
||||
)
|
||||
frequency_stdevs = numpy.minimum(
|
||||
numpy.maximum(
|
||||
numpy.std(numpy.log(selected[:, -1])),
|
||||
self.default_w_log_step / (self.n_s * 10),
|
||||
),
|
||||
self.default_upper_w_log_step,
|
||||
)
|
||||
stdev_array.append(
|
||||
pdme.subspace_simulation.DipoleStandardDeviation(
|
||||
p_theta_step=max(
|
||||
numpy.std(thetas), self.default_theta_step / (self.n_s * 10)
|
||||
),
|
||||
p_phi_step=max(
|
||||
numpy.std(phis), self.default_phi_step / (self.n_s * 10)
|
||||
),
|
||||
rx_step=rstdevs[0],
|
||||
ry_step=rstdevs[1],
|
||||
rz_step=rstdevs[2],
|
||||
w_log_step=frequency_stdevs,
|
||||
)
|
||||
)
|
||||
else:
|
||||
default_stdev = pdme.subspace_simulation.DipoleStandardDeviation(
|
||||
self.default_phi_step,
|
||||
self.default_theta_step,
|
||||
self.default_r_step,
|
||||
self.default_r_step,
|
||||
self.default_r_step,
|
||||
self.default_w_log_step,
|
||||
)
|
||||
stdev_array = [default_stdev]
|
||||
stdevs = pdme.subspace_simulation.MCMCStandardDeviation(stdev_array)
|
||||
return stdevs
|
||||
|
||||
|
||||
def reverse_bisect_right(a, x, lo=0, hi=None):
|
||||
"""Return the index where to insert item x in list a, assuming a is sorted in descending order.
|
||||
|
||||
The return value i is such that all e in a[:i] have e >= x, and all e in
|
||||
a[i:] have e < x. So if x already appears in the list, a.insert(x) will
|
||||
insert just after the rightmost x already there.
|
||||
|
||||
Optional args lo (default 0) and hi (default len(a)) bound the
|
||||
slice of a to be searched.
|
||||
|
||||
Essentially, the function returns number of elements in a which are >= than x.
|
||||
>>> a = [8, 6, 5, 4, 2]
|
||||
>>> reverse_bisect_right(a, 5)
|
||||
3
|
||||
>>> a[:reverse_bisect_right(a, 5)]
|
||||
[8, 6, 5]
|
||||
"""
|
||||
if lo < 0:
|
||||
raise ValueError("lo must be non-negative")
|
||||
if hi is None:
|
||||
hi = len(a)
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
if x > a[mid]:
|
||||
hi = mid
|
||||
else:
|
||||
lo = mid + 1
|
||||
return lo
|
Reference in New Issue
Block a user