Compare commits
17 Commits
b3c3412609
...
c288e42e5f
Author | SHA1 | Date | |
---|---|---|---|
c288e42e5f | |||
9cfd484d7c | |||
09fad2e102 | |||
24ac65bf9c | |||
8fbae32111 | |||
b1c01b25c8 | |||
a14d9834e5 | |||
8d04803eb3 | |||
92b49fce7c | |||
8845b2875f | |||
72791f2d0f | |||
d258cfbec7 | |||
b3bf4cde97 | |||
60f29b0b2f | |||
093a3fb5c4 | |||
dc1d2d45a3 | |||
f0e2fa3da9 |
25
CHANGELOG.md
25
CHANGELOG.md
@ -2,6 +2,31 @@
|
||||
|
||||
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
||||
|
||||
## [1.3.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.1...1.3.0) (2024-05-20)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add multi run to wrap multi model and repeat runs ([92b49fc](https://gitea.deepak.science:2222/physics/deepdog/commit/92b49fce7c86f14484deb1c4aaaa810a6f69c08a))
|
||||
* adds a filter that works with cost functions ([8845b28](https://gitea.deepak.science:2222/physics/deepdog/commit/8845b2875f2c91c91dd3988fabda26400c59b2d7))
|
||||
* improve initial cost calculation to allow multiprocessing, adds ability to specify a number of levels to do with direct mc instead of subset simulation ([09fad2e](https://gitea.deepak.science:2222/physics/deepdog/commit/09fad2e1024d9237a6a4f7931f51cb4c84b83bf8))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Adds ugly hack for stdevs for this uniform range to multiply by root3, proper fix would be in pdme ([b1c01b2](https://gitea.deepak.science:2222/physics/deepdog/commit/b1c01b25c8f2c3947be23f5b2c656c37437dab17))
|
||||
* fix seeding to avoid recreating seed combinations across multi runs ([24ac65b](https://gitea.deepak.science:2222/physics/deepdog/commit/24ac65bf9c74c454fec826ca9de640fe095f5a17))
|
||||
|
||||
### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
|
||||
|
||||
## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* adds additional matching regexes ([dc1d2d4](https://gitea.deepak.science:2222/physics/deepdog/commit/dc1d2d45a3e631c5efccce80f8a24fa87c6089e0))
|
||||
* adds magnitude enabled parsing option ([f0e2fa3](https://gitea.deepak.science:2222/physics/deepdog/commit/f0e2fa3da9f5a5136908d691137a904fda4e3a9a))
|
||||
|
||||
## [1.1.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.1...1.1.0) (2024-05-03)
|
||||
|
||||
|
||||
|
@ -72,6 +72,7 @@ def main(args: argparse.Namespace):
|
||||
for f in tqdm.tqdm(out_files, desc="reading files", leave=False)
|
||||
]
|
||||
|
||||
# Refactor here to allow for arbitrary likelihood file sources
|
||||
_logger.info("building uncoalesced dict")
|
||||
uncoalesced_dict = deepdog.cli.probs.dicts.build_model_dict(parsed_output_files)
|
||||
|
||||
|
24
deepdog/direct_monte_carlo/cost_function_filter.py
Normal file
24
deepdog/direct_monte_carlo/cost_function_filter.py
Normal file
@ -0,0 +1,24 @@
|
||||
from deepdog.direct_monte_carlo.direct_mc import DirectMonteCarloFilter
|
||||
from typing import Callable
|
||||
import numpy
|
||||
|
||||
|
||||
class CostFunctionTargetFilter(DirectMonteCarloFilter):
|
||||
def __init__(
|
||||
self,
|
||||
cost_function: Callable[[numpy.ndarray], numpy.ndarray],
|
||||
target_cost: float,
|
||||
):
|
||||
"""
|
||||
Filters dipoles by cost, only leaving dipoles with cost below target_cost
|
||||
"""
|
||||
self.cost_function = cost_function
|
||||
self.target_cost = target_cost
|
||||
|
||||
def filter_samples(self, samples: numpy.ndarray) -> numpy.ndarray:
|
||||
current_sample = samples
|
||||
|
||||
costs = self.cost_function(current_sample)
|
||||
|
||||
current_sample = current_sample[costs < self.target_cost]
|
||||
return current_sample
|
@ -31,10 +31,10 @@ class Indexifier:
|
||||
|
||||
def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
|
||||
self.dict = list_dict
|
||||
self.product_dict = _dict_product(self.dict)
|
||||
|
||||
def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
|
||||
product_dict = _dict_product(self.dict)
|
||||
return product_dict[n]
|
||||
return self.product_dict[n]
|
||||
|
||||
def _indexify_indices(self, n: int) -> typing.Sequence[int]:
|
||||
"""
|
||||
|
@ -8,17 +8,30 @@ import csv
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
FILENAME_REGEX = re.compile(
|
||||
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
)
|
||||
|
||||
MODEL_REGEXES = [
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)"
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
]
|
||||
]
|
||||
|
||||
FILE_SLUG_REGEXES = [
|
||||
r"mock_tarucha-(?P<job_index>\d+)",
|
||||
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"(?P<tag>\w+)-(?P<job_index>\d+)",
|
||||
r"mock_tarucha-(?P<job_index>\d+)",
|
||||
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
|
||||
]
|
||||
]
|
||||
|
||||
SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunOutputFilename:
|
||||
@ -27,7 +40,6 @@ class BayesrunOutputFilename:
|
||||
path: pathlib.Path
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunColumnParsed:
|
||||
"""
|
||||
class for parsing a bayesrun while pulling certain special fields out
|
||||
@ -38,10 +50,21 @@ class BayesrunColumnParsed:
|
||||
self.model_field_dict = {
|
||||
k: v for k, v in groupdict.items() if k != "field_name"
|
||||
}
|
||||
self._groupdict_str = repr(groupdict)
|
||||
|
||||
def __str__(self):
|
||||
return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
|
||||
|
||||
def __repr__(self):
|
||||
return f"BayesrunColumnParsed({self._groupdict_str})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, BayesrunColumnParsed):
|
||||
return (self.column_field == other.column_field) and (
|
||||
self.model_field_dict == other.model_field_dict
|
||||
)
|
||||
return NotImplemented
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunModelResult:
|
||||
@ -73,7 +96,7 @@ def _parse_bayesrun_column(
|
||||
Returns the groupdict for the first match, or None if no match found.
|
||||
"""
|
||||
for pattern in MODEL_REGEXES:
|
||||
match = re.match(pattern, column)
|
||||
match = pattern.match(column)
|
||||
if match:
|
||||
return BayesrunColumnParsed(match.groupdict())
|
||||
else:
|
||||
@ -112,7 +135,7 @@ def _parse_bayesrun_row(
|
||||
|
||||
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
filename = file.name
|
||||
match = re.match(FILENAME_REGEX, filename)
|
||||
match = FILENAME_REGEX.match(filename)
|
||||
if not match:
|
||||
raise ValueError(f"{filename} was not a valid bayesrun output")
|
||||
groups = match.groupdict()
|
||||
@ -123,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
|
||||
def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
|
||||
for pattern in FILE_SLUG_REGEXES:
|
||||
match = re.match(pattern, slug)
|
||||
match = pattern.match(slug)
|
||||
if match:
|
||||
return match.groupdict()
|
||||
else:
|
||||
|
@ -1,9 +1,11 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import numpy
|
||||
import pdme.measurement
|
||||
import pdme.measurement.input_types
|
||||
import pdme.model
|
||||
import pdme.subspace_simulation
|
||||
from typing import Sequence, Tuple, Optional
|
||||
from typing import Sequence, Tuple, Optional, Callable, Union, List
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
@ -18,47 +20,63 @@ class SubsetSimulationResult:
|
||||
under_target_cost: Optional[float]
|
||||
under_target_likelihood: Optional[float]
|
||||
lowest_likelihood: Optional[float]
|
||||
messages: Sequence[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MultiSubsetSimulationResult:
|
||||
child_results: Sequence[SubsetSimulationResult]
|
||||
model_name: str
|
||||
estimated_likelihood: float
|
||||
arithmetic_mean_estimated_likelihood: float
|
||||
num_children: int
|
||||
num_finished_children: int
|
||||
clean_estimate: bool
|
||||
|
||||
|
||||
class SubsetSimulation:
|
||||
def __init__(
|
||||
self,
|
||||
model_name_pair,
|
||||
dot_inputs,
|
||||
actual_measurements: Sequence[pdme.measurement.DotMeasurement],
|
||||
# actual_measurements: Sequence[pdme.measurement.DotMeasurement],
|
||||
cost_function: Callable[[numpy.ndarray], numpy.ndarray],
|
||||
n_c: int,
|
||||
n_s: int,
|
||||
m_max: int,
|
||||
target_cost: Optional[float] = None,
|
||||
level_0_seed: int = 200,
|
||||
mcmc_seed: int = 20,
|
||||
level_0_seed: Union[int, Sequence[int]] = 200,
|
||||
mcmc_seed: Union[int, Sequence[int]] = 20,
|
||||
use_adaptive_steps=True,
|
||||
default_phi_step=0.01,
|
||||
default_theta_step=0.01,
|
||||
default_r_step=0.01,
|
||||
default_w_log_step=0.01,
|
||||
default_upper_w_log_step=4,
|
||||
num_initial_dmc_gens=1,
|
||||
keep_probs_list=True,
|
||||
dump_last_generation_to_file=False,
|
||||
initial_cost_chunk_size=100,
|
||||
initial_cost_multiprocess=True,
|
||||
cap_core_count: int = 0, # 0 means cap at num cores - 1
|
||||
):
|
||||
name, model = model_name_pair
|
||||
self.model_name = name
|
||||
self.model = model
|
||||
_logger.info(f"got model {self.model_name}")
|
||||
|
||||
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
|
||||
dot_inputs
|
||||
)
|
||||
# dot_inputs = [(meas.r, meas.f) for meas in actual_measurements]
|
||||
# self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
|
||||
# dot_inputs
|
||||
# )
|
||||
# _logger.debug(f"actual measurements: {actual_measurements}")
|
||||
self.actual_measurement_array = numpy.array([m.v for m in actual_measurements])
|
||||
# self.actual_measurement_array = numpy.array([m.v for m in actual_measurements])
|
||||
|
||||
def cost_function_to_use(dipoles_to_test):
|
||||
return pdme.subspace_simulation.proportional_costs_vs_actual_measurement(
|
||||
self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test
|
||||
)
|
||||
# def cost_function_to_use(dipoles_to_test):
|
||||
# return pdme.subspace_simulation.proportional_costs_vs_actual_measurement(
|
||||
# self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test
|
||||
# )
|
||||
|
||||
self.cost_function_to_use = cost_function_to_use
|
||||
self.cost_function_to_use = cost_function
|
||||
|
||||
self.n_c = n_c
|
||||
self.n_s = n_s
|
||||
@ -68,16 +86,25 @@ class SubsetSimulation:
|
||||
self.mcmc_seed = mcmc_seed
|
||||
|
||||
self.use_adaptive_steps = use_adaptive_steps
|
||||
self.default_phi_step = default_phi_step
|
||||
self.default_phi_step = (
|
||||
default_phi_step * 1.73
|
||||
) # this is a hack to fix a missing sqrt 3 in the proposal function code.
|
||||
self.default_theta_step = default_theta_step
|
||||
self.default_r_step = default_r_step
|
||||
self.default_w_log_step = default_w_log_step
|
||||
self.default_r_step = (
|
||||
default_r_step * 1.73
|
||||
) # this is a hack to fix a missing sqrt 3 in the proposal function code.
|
||||
self.default_w_log_step = (
|
||||
default_w_log_step * 1.73
|
||||
) # this is a hack to fix a missing sqrt 3 in the proposal function code.
|
||||
self.default_upper_w_log_step = default_upper_w_log_step
|
||||
|
||||
_logger.info("using params:")
|
||||
_logger.info(f"\tn_c: {self.n_c}")
|
||||
_logger.info(f"\tn_s: {self.n_s}")
|
||||
_logger.info(f"\tm: {self.m_max}")
|
||||
_logger.info(f"\t{num_initial_dmc_gens=}")
|
||||
_logger.info(f"\t{mcmc_seed=}")
|
||||
_logger.info(f"\t{level_0_seed=}")
|
||||
_logger.info("let's do level 0...")
|
||||
|
||||
self.target_cost = target_cost
|
||||
@ -87,158 +114,176 @@ class SubsetSimulation:
|
||||
self.dump_last_generations = dump_last_generation_to_file
|
||||
|
||||
self.initial_cost_chunk_size = initial_cost_chunk_size
|
||||
self.initial_cost_multiprocess = initial_cost_multiprocess
|
||||
|
||||
self.cap_core_count = cap_core_count
|
||||
|
||||
self.num_dmc_gens = num_initial_dmc_gens
|
||||
|
||||
def _single_chain_gen(self, args: Tuple):
|
||||
threshold_cost, stdevs, rng_seed, (c, s) = args
|
||||
rng = numpy.random.default_rng(rng_seed)
|
||||
return self.model.get_repeat_counting_mcmc_chain(
|
||||
s,
|
||||
self.cost_function_to_use,
|
||||
self.n_s,
|
||||
threshold_cost,
|
||||
stdevs,
|
||||
initial_cost=c,
|
||||
rng_arg=rng,
|
||||
)
|
||||
|
||||
def execute(self) -> SubsetSimulationResult:
|
||||
|
||||
probs_list = []
|
||||
|
||||
output_messages = []
|
||||
|
||||
# If we have n_s = 10 and n_c = 100, then our big N = 1000 and p = 1/10
|
||||
# The DMC stage would normally generate 1000, then pick the best 100 and start counting prob = p/10.
|
||||
# Let's say we want our DMC stage to go down to level 2.
|
||||
# Then we need to filter out p^2, so our initial has to be N_0 = N / p = n_c * n_s^2
|
||||
initial_dmc_n = self.n_c * (self.n_s**self.num_dmc_gens)
|
||||
initial_level = (
|
||||
self.num_dmc_gens - 1
|
||||
) # This is perfunctory but let's label it here really explicitly
|
||||
_logger.info(f"Generating {initial_dmc_n} for DMC stage")
|
||||
sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
|
||||
self.n_c * self.n_s,
|
||||
initial_dmc_n,
|
||||
-1,
|
||||
rng_to_use=numpy.random.default_rng(self.level_0_seed),
|
||||
)
|
||||
# _logger.debug(sample_dipoles)
|
||||
# _logger.debug(sample_dipoles.shape)
|
||||
|
||||
raw_costs = []
|
||||
_logger.debug("Finished dipole generation")
|
||||
_logger.debug(
|
||||
f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}"
|
||||
f"Using iterated multiprocessing cost function thing with chunk size {self.initial_cost_chunk_size}"
|
||||
)
|
||||
|
||||
for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size):
|
||||
_logger.debug(f"doing chunk {x}")
|
||||
raw_costs.extend(
|
||||
self.cost_function_to_use(
|
||||
sample_dipoles[x : x + self.initial_cost_chunk_size]
|
||||
)
|
||||
# core count etc. logic here
|
||||
core_count = multiprocessing.cpu_count() - 1 or 1
|
||||
if (self.cap_core_count >= 1) and (self.cap_core_count < core_count):
|
||||
core_count = self.cap_core_count
|
||||
_logger.info(f"Using {core_count} cores")
|
||||
|
||||
with multiprocessing.Pool(core_count) as pool:
|
||||
|
||||
# Do the initial DMC calculation in a multiprocessing
|
||||
|
||||
chunks = numpy.array_split(
|
||||
sample_dipoles,
|
||||
range(
|
||||
self.initial_cost_chunk_size,
|
||||
len(sample_dipoles),
|
||||
self.initial_cost_chunk_size,
|
||||
),
|
||||
)
|
||||
costs = numpy.array(raw_costs)
|
||||
if self.initial_cost_multiprocess:
|
||||
_logger.debug("Multiprocessing initial costs")
|
||||
raw_costs = pool.map(self.cost_function_to_use, chunks)
|
||||
else:
|
||||
_logger.debug("Single process initial costs")
|
||||
raw_costs = []
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
_logger.debug(f"doing chunk #{chunk_idx}")
|
||||
raw_costs.append(self.cost_function_to_use(chunk))
|
||||
costs = numpy.concatenate(raw_costs)
|
||||
_logger.debug("finished initial dmc cost calculation")
|
||||
# _logger.debug(f"costs: {costs}")
|
||||
sorted_indexes = costs.argsort()[::-1]
|
||||
|
||||
_logger.debug(f"costs: {costs}")
|
||||
sorted_indexes = costs.argsort()[::-1]
|
||||
# _logger.debug(costs[sorted_indexes])
|
||||
# _logger.debug(sample_dipoles[sorted_indexes])
|
||||
|
||||
_logger.debug(costs[sorted_indexes])
|
||||
_logger.debug(sample_dipoles[sorted_indexes])
|
||||
sorted_costs = costs[sorted_indexes]
|
||||
sorted_dipoles = sample_dipoles[sorted_indexes]
|
||||
|
||||
sorted_costs = costs[sorted_indexes]
|
||||
sorted_dipoles = sample_dipoles[sorted_indexes]
|
||||
|
||||
threshold_cost = sorted_costs[-self.n_c]
|
||||
|
||||
all_dipoles = numpy.array(
|
||||
[
|
||||
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp)
|
||||
for samp in sorted_dipoles
|
||||
]
|
||||
)
|
||||
all_chains = list(zip(sorted_costs, all_dipoles))
|
||||
|
||||
mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
|
||||
|
||||
for i in range(self.m_max):
|
||||
next_seeds = all_chains[-self.n_c :]
|
||||
|
||||
if self.dump_last_generations:
|
||||
_logger.info("writing out csv file")
|
||||
next_dipoles_seed_dipoles = numpy.array([n[1] for n in next_seeds])
|
||||
for n in range(self.model.n):
|
||||
_logger.info(f"{next_dipoles_seed_dipoles[:, n].shape}")
|
||||
numpy.savetxt(
|
||||
f"generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
|
||||
next_dipoles_seed_dipoles[:, n],
|
||||
delimiter=",",
|
||||
)
|
||||
|
||||
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||
_logger.info(f"got stdevs: {stdevs.stdevs}")
|
||||
all_long_chains = []
|
||||
for seed_index, (c, s) in enumerate(
|
||||
next_seeds[:: len(next_seeds) // 20]
|
||||
):
|
||||
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
|
||||
# until new version gotta do
|
||||
_logger.debug(f"\t{seed_index}: doing long chain on the next seed")
|
||||
|
||||
long_chain = self.model.get_mcmc_chain(
|
||||
s,
|
||||
self.cost_function_to_use,
|
||||
1000,
|
||||
threshold_cost,
|
||||
stdevs,
|
||||
initial_cost=c,
|
||||
rng_arg=mcmc_rng,
|
||||
)
|
||||
for _, chained in long_chain:
|
||||
all_long_chains.append(chained)
|
||||
all_long_chains_array = numpy.array(all_long_chains)
|
||||
for n in range(self.model.n):
|
||||
_logger.info(f"{all_long_chains_array[:, n].shape}")
|
||||
numpy.savetxt(
|
||||
f"long_chain_generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
|
||||
all_long_chains_array[:, n],
|
||||
delimiter=",",
|
||||
)
|
||||
|
||||
if self.keep_probs_list:
|
||||
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
|
||||
probs_list.append(
|
||||
(
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (i)),
|
||||
cost_chain[0],
|
||||
i + 1,
|
||||
)
|
||||
)
|
||||
|
||||
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||
|
||||
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||
_logger.info(f"got stdevs: {stdevs.stdevs}")
|
||||
_logger.debug("Starting the MCMC")
|
||||
all_chains = []
|
||||
for seed_index, (c, s) in enumerate(next_seeds):
|
||||
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
|
||||
# until new version gotta do
|
||||
all_dipoles = numpy.array(
|
||||
[
|
||||
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp)
|
||||
for samp in sorted_dipoles
|
||||
]
|
||||
)
|
||||
all_chains = list(zip(sorted_costs, all_dipoles))
|
||||
for dmc_level in range(initial_level):
|
||||
# if initial level is 1, we want to print out what the level 0 threshold would have been?
|
||||
_logger.debug(f"Get the pseudo statistics for level {dmc_level}")
|
||||
_logger.debug(f"Whole chain has length {len(all_chains)}")
|
||||
pseudo_threshold_index = -(
|
||||
self.n_c * (self.n_s ** (self.num_dmc_gens - dmc_level - 1))
|
||||
)
|
||||
_logger.debug(
|
||||
f"\t{seed_index}: getting another chain from the next seed"
|
||||
f"Have a pseudo_threshold_index of {pseudo_threshold_index}, or {len(all_chains) + pseudo_threshold_index}"
|
||||
)
|
||||
chain = self.model.get_mcmc_chain(
|
||||
s,
|
||||
self.cost_function_to_use,
|
||||
self.n_s,
|
||||
threshold_cost,
|
||||
stdevs,
|
||||
initial_cost=c,
|
||||
rng_arg=mcmc_rng,
|
||||
pseudo_threshold_cost = all_chains[-pseudo_threshold_index][0]
|
||||
_logger.info(
|
||||
f"Pseudo-level {dmc_level} threshold cost {pseudo_threshold_cost}, at P = (1 / {self.n_s})^{dmc_level + 1}"
|
||||
)
|
||||
for cost, chained in chain:
|
||||
try:
|
||||
filtered_cost = cost[0]
|
||||
except (IndexError, TypeError):
|
||||
filtered_cost = cost
|
||||
all_chains.append((filtered_cost, chained))
|
||||
_logger.debug("finished mcmc")
|
||||
# _logger.debug(all_chains)
|
||||
all_chains = all_chains[pseudo_threshold_index:]
|
||||
|
||||
all_chains.sort(key=lambda c: c[0], reverse=True)
|
||||
_logger.debug("finished sorting all_chains")
|
||||
long_mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
|
||||
mcmc_rng_seed_sequence = numpy.random.SeedSequence(self.mcmc_seed)
|
||||
|
||||
threshold_cost = all_chains[-self.n_c][0]
|
||||
_logger.info(
|
||||
f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}"
|
||||
f"Finishing DMC threshold cost {threshold_cost} at level {initial_level}, at P = (1 / {self.n_s})^{initial_level + 1}"
|
||||
)
|
||||
if (self.target_cost is not None) and (threshold_cost < self.target_cost):
|
||||
_logger.info(
|
||||
f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early"
|
||||
)
|
||||
_logger.debug(f"Executing the MCMC with chains of length {len(all_chains)}")
|
||||
|
||||
cost_list = [c[0] for c in all_chains]
|
||||
over_index = reverse_bisect_right(cost_list, self.target_cost)
|
||||
# Now we move on to the MCMC part of the algorithm
|
||||
|
||||
shorter_probs_list = []
|
||||
for cost_index, cost_chain in enumerate(all_chains):
|
||||
if self.keep_probs_list:
|
||||
# This is important, we want to allow some extra initial levels so we need to account for that here!
|
||||
for i in range(self.num_dmc_gens, self.m_max):
|
||||
_logger.info(f"Starting level {i}")
|
||||
next_seeds = all_chains[-self.n_c :]
|
||||
|
||||
if self.dump_last_generations:
|
||||
_logger.info("writing out csv file")
|
||||
next_dipoles_seed_dipoles = numpy.array([n[1] for n in next_seeds])
|
||||
for n in range(self.model.n):
|
||||
_logger.info(f"{next_dipoles_seed_dipoles[:, n].shape}")
|
||||
numpy.savetxt(
|
||||
f"generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
|
||||
next_dipoles_seed_dipoles[:, n],
|
||||
delimiter=",",
|
||||
)
|
||||
|
||||
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||
_logger.info(f"got stdevs: {stdevs.stdevs}")
|
||||
all_long_chains = []
|
||||
for seed_index, (c, s) in enumerate(
|
||||
next_seeds[:: len(next_seeds) // 20]
|
||||
):
|
||||
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
|
||||
# until new version gotta do
|
||||
_logger.debug(
|
||||
f"\t{seed_index}: doing long chain on the next seed"
|
||||
)
|
||||
|
||||
long_chain = self.model.get_mcmc_chain(
|
||||
s,
|
||||
self.cost_function_to_use,
|
||||
1000,
|
||||
threshold_cost,
|
||||
stdevs,
|
||||
initial_cost=c,
|
||||
rng_arg=long_mcmc_rng,
|
||||
)
|
||||
for _, chained in long_chain:
|
||||
all_long_chains.append(chained)
|
||||
all_long_chains_array = numpy.array(all_long_chains)
|
||||
for n in range(self.model.n):
|
||||
_logger.info(f"{all_long_chains_array[:, n].shape}")
|
||||
numpy.savetxt(
|
||||
f"long_chain_generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
|
||||
all_long_chains_array[:, n],
|
||||
delimiter=",",
|
||||
)
|
||||
|
||||
if self.keep_probs_list:
|
||||
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
|
||||
probs_list.append(
|
||||
(
|
||||
(
|
||||
@ -250,26 +295,105 @@ class SubsetSimulation:
|
||||
i + 1,
|
||||
)
|
||||
)
|
||||
shorter_probs_list.append(
|
||||
(
|
||||
cost_chain[0],
|
||||
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
|
||||
/ (self.n_s ** (i)),
|
||||
)
|
||||
)
|
||||
# _logger.info(shorter_probs_list)
|
||||
result = SubsetSimulationResult(
|
||||
probs_list=probs_list,
|
||||
over_target_cost=shorter_probs_list[over_index - 1][0],
|
||||
over_target_likelihood=shorter_probs_list[over_index - 1][1],
|
||||
under_target_cost=shorter_probs_list[over_index][0],
|
||||
under_target_likelihood=shorter_probs_list[over_index][1],
|
||||
lowest_likelihood=shorter_probs_list[-1][1],
|
||||
)
|
||||
return result
|
||||
|
||||
# _logger.debug([c[0] for c in all_chains[-n_c:]])
|
||||
_logger.info(f"doing level {i + 1}")
|
||||
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||
|
||||
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||
_logger.debug(f"got stdevs, begin: {stdevs.stdevs[:10]}")
|
||||
_logger.debug("Starting the MCMC")
|
||||
all_chains = []
|
||||
|
||||
seeds = mcmc_rng_seed_sequence.spawn(len(next_seeds))
|
||||
pool_results = pool.imap_unordered(
|
||||
self._single_chain_gen,
|
||||
[
|
||||
(threshold_cost, stdevs, rng_seed, test_seed)
|
||||
for rng_seed, test_seed in zip(seeds, next_seeds)
|
||||
],
|
||||
chunksize=50,
|
||||
)
|
||||
|
||||
# count for ergodicity analysis
|
||||
samples_generated = 0
|
||||
samples_rejected = 0
|
||||
|
||||
for rejected_count, chain in pool_results:
|
||||
for cost, chained in chain:
|
||||
try:
|
||||
filtered_cost = cost[0]
|
||||
except (IndexError, TypeError):
|
||||
filtered_cost = cost
|
||||
all_chains.append((filtered_cost, chained))
|
||||
|
||||
samples_generated += self.n_s
|
||||
samples_rejected += rejected_count
|
||||
|
||||
_logger.debug("finished mcmc")
|
||||
_logger.debug(f"{samples_rejected=} out of {samples_generated=}")
|
||||
if samples_rejected * 2 > samples_generated:
|
||||
reject_ratio = samples_rejected / samples_generated
|
||||
rejectionmessage = f"On level {i}, rejected {samples_rejected} out of {samples_generated}, {reject_ratio=} is too high and may indicate ergodicity problems"
|
||||
output_messages.append(rejectionmessage)
|
||||
_logger.warning(rejectionmessage)
|
||||
# _logger.debug(all_chains)
|
||||
|
||||
all_chains.sort(key=lambda c: c[0], reverse=True)
|
||||
_logger.debug("finished sorting all_chains")
|
||||
|
||||
threshold_cost = all_chains[-self.n_c][0]
|
||||
_logger.info(
|
||||
f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}"
|
||||
)
|
||||
if (self.target_cost is not None) and (
|
||||
threshold_cost < self.target_cost
|
||||
):
|
||||
_logger.info(
|
||||
f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early"
|
||||
)
|
||||
|
||||
cost_list = [c[0] for c in all_chains]
|
||||
over_index = reverse_bisect_right(cost_list, self.target_cost)
|
||||
|
||||
winner = all_chains[over_index][1]
|
||||
_logger.info(f"Winner obtained: {winner}")
|
||||
shorter_probs_list = []
|
||||
for cost_index, cost_chain in enumerate(all_chains):
|
||||
if self.keep_probs_list:
|
||||
probs_list.append(
|
||||
(
|
||||
(
|
||||
(self.n_c * self.n_s - cost_index)
|
||||
/ (self.n_c * self.n_s)
|
||||
)
|
||||
/ (self.n_s ** (i)),
|
||||
cost_chain[0],
|
||||
i + 1,
|
||||
)
|
||||
)
|
||||
shorter_probs_list.append(
|
||||
(
|
||||
cost_chain[0],
|
||||
(
|
||||
(self.n_c * self.n_s - cost_index)
|
||||
/ (self.n_c * self.n_s)
|
||||
)
|
||||
/ (self.n_s ** (i)),
|
||||
)
|
||||
)
|
||||
# _logger.info(shorter_probs_list)
|
||||
result = SubsetSimulationResult(
|
||||
probs_list=probs_list,
|
||||
over_target_cost=shorter_probs_list[over_index - 1][0],
|
||||
over_target_likelihood=shorter_probs_list[over_index - 1][1],
|
||||
under_target_cost=shorter_probs_list[over_index][0],
|
||||
under_target_likelihood=shorter_probs_list[over_index][1],
|
||||
lowest_likelihood=shorter_probs_list[-1][1],
|
||||
messages=output_messages,
|
||||
)
|
||||
return result
|
||||
|
||||
# _logger.debug([c[0] for c in all_chains[-n_c:]])
|
||||
_logger.info(f"doing level {i + 1}")
|
||||
|
||||
if self.keep_probs_list:
|
||||
for cost_index, cost_chain in enumerate(all_chains):
|
||||
@ -285,8 +409,8 @@ class SubsetSimulation:
|
||||
_logger.info(
|
||||
f"final threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{self.m_max + 1}"
|
||||
)
|
||||
for a in all_chains[-10:]:
|
||||
_logger.info(a)
|
||||
# for a in all_chains[-10:]:
|
||||
# _logger.info(a)
|
||||
# for prob, prob_cost in probs_list:
|
||||
# _logger.info(f"\t{prob}: {prob_cost}")
|
||||
probs_list.sort(key=lambda c: c[0], reverse=True)
|
||||
@ -300,6 +424,7 @@ class SubsetSimulation:
|
||||
under_target_cost=None,
|
||||
under_target_likelihood=None,
|
||||
lowest_likelihood=min_likelihood,
|
||||
messages=output_messages,
|
||||
)
|
||||
return result
|
||||
|
||||
@ -358,6 +483,116 @@ class SubsetSimulation:
|
||||
return stdevs
|
||||
|
||||
|
||||
class MultiSubsetSimulations:
|
||||
def __init__(
|
||||
self,
|
||||
model_name_pairs: Sequence[Tuple[str, pdme.model.DipoleModel]],
|
||||
# actual_measurements: Sequence[pdme.measurement.DotMeasurement],
|
||||
cost_function: Callable[[numpy.ndarray], numpy.ndarray],
|
||||
num_runs: int,
|
||||
n_c: int,
|
||||
n_s: int,
|
||||
m_max: int,
|
||||
target_cost: float,
|
||||
num_initial_dmc_gens: int = 1,
|
||||
level_0_seed_seed: int = 200,
|
||||
mcmc_seed_seed: int = 20,
|
||||
use_adaptive_steps=True,
|
||||
default_phi_step=0.01,
|
||||
default_theta_step=0.01,
|
||||
default_r_step=0.01,
|
||||
default_w_log_step=0.01,
|
||||
default_upper_w_log_step=4,
|
||||
initial_cost_chunk_size=100,
|
||||
cap_core_count: int = 0, # 0 means cap at num cores - 1
|
||||
):
|
||||
self.model_name_pairs = model_name_pairs
|
||||
self.cost_function = cost_function
|
||||
self.num_runs = num_runs
|
||||
self.n_c = n_c
|
||||
self.n_s = n_s
|
||||
self.m_max = m_max
|
||||
self.target_cost = target_cost # This is not optional here!
|
||||
|
||||
self.num_dmc_gens = num_initial_dmc_gens
|
||||
|
||||
self.level_0_seed_seed = level_0_seed_seed
|
||||
self.mcmc_seed_seed = mcmc_seed_seed
|
||||
|
||||
self.use_adaptive_steps = use_adaptive_steps
|
||||
self.default_phi_step = default_phi_step
|
||||
self.default_theta_step = default_theta_step
|
||||
self.default_r_step = default_r_step
|
||||
self.default_w_log_step = default_w_log_step
|
||||
self.default_upper_w_log_step = default_upper_w_log_step
|
||||
self.initial_cost_chunk_size = initial_cost_chunk_size
|
||||
self.cap_core_count = cap_core_count
|
||||
|
||||
def execute(self) -> Sequence[MultiSubsetSimulationResult]:
|
||||
output: List[MultiSubsetSimulationResult] = []
|
||||
for model_index, model_name_pair in enumerate(self.model_name_pairs):
|
||||
ss_results = [
|
||||
SubsetSimulation(
|
||||
model_name_pair,
|
||||
self.cost_function,
|
||||
self.n_c,
|
||||
self.n_s,
|
||||
self.m_max,
|
||||
self.target_cost,
|
||||
num_initial_dmc_gens=self.num_dmc_gens,
|
||||
level_0_seed=[model_index, run_index, self.level_0_seed_seed],
|
||||
mcmc_seed=[model_index, run_index, self.mcmc_seed_seed],
|
||||
use_adaptive_steps=self.use_adaptive_steps,
|
||||
default_phi_step=self.default_phi_step,
|
||||
default_theta_step=self.default_theta_step,
|
||||
default_r_step=self.default_r_step,
|
||||
default_w_log_step=self.default_w_log_step,
|
||||
default_upper_w_log_step=self.default_upper_w_log_step,
|
||||
keep_probs_list=False,
|
||||
dump_last_generation_to_file=False,
|
||||
initial_cost_chunk_size=self.initial_cost_chunk_size,
|
||||
cap_core_count=self.cap_core_count,
|
||||
).execute()
|
||||
for run_index in range(self.num_runs)
|
||||
]
|
||||
output.append(coalesce_ss_results(model_name_pair[0], ss_results))
|
||||
return output
|
||||
|
||||
|
||||
def coalesce_ss_results(
|
||||
model_name: str, results: Sequence[SubsetSimulationResult]
|
||||
) -> MultiSubsetSimulationResult:
|
||||
|
||||
num_finished = sum(1 for res in results if res.under_target_likelihood is not None)
|
||||
|
||||
estimated_likelihoods = numpy.array(
|
||||
[
|
||||
res.under_target_likelihood
|
||||
if res.under_target_likelihood is not None
|
||||
else res.lowest_likelihood
|
||||
for res in results
|
||||
]
|
||||
)
|
||||
|
||||
_logger.info(estimated_likelihoods)
|
||||
geometric_mean_estimated_likelihoods = numpy.exp(
|
||||
numpy.log(estimated_likelihoods).mean()
|
||||
)
|
||||
_logger.info(geometric_mean_estimated_likelihoods)
|
||||
arithmetic_mean_estimated_likelihoods = estimated_likelihoods.mean()
|
||||
|
||||
result = MultiSubsetSimulationResult(
|
||||
child_results=results,
|
||||
model_name=model_name,
|
||||
estimated_likelihood=geometric_mean_estimated_likelihoods,
|
||||
arithmetic_mean_estimated_likelihood=arithmetic_mean_estimated_likelihoods,
|
||||
num_children=len(results),
|
||||
num_finished_children=num_finished,
|
||||
clean_estimate=num_finished == len(results),
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def reverse_bisect_right(a, x, lo=0, hi=None):
|
||||
"""Return the index where to insert item x in list a, assuming a is sorted in descending order.
|
||||
|
||||
|
8
poetry.lock
generated
8
poetry.lock
generated
@ -786,13 +786,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "pdme"
|
||||
version = "1.2.0"
|
||||
version = "1.5.0"
|
||||
description = "Python dipole model evaluator"
|
||||
optional = false
|
||||
python-versions = "<3.10,>=3.8.1"
|
||||
files = [
|
||||
{file = "pdme-1.2.0-py3-none-any.whl", hash = "sha256:602710a053f22921b4adbc03d46d284149fe2367a65455cde56608708e01c84b"},
|
||||
{file = "pdme-1.2.0.tar.gz", hash = "sha256:412806d7ae384c048515e0f2cba70252778bf153800829a1d3265a0596872263"},
|
||||
{file = "pdme-1.5.0-py3-none-any.whl", hash = "sha256:1b4fa30ba98a336957b3029563552d73286a3a5f932809ac1330e65a1f61c363"},
|
||||
{file = "pdme-1.5.0.tar.gz", hash = "sha256:cc0ac4ffab2994e08b4efde2991c6d9dccb2942c7e33c4be3b52e068366526d1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@ -1275,4 +1275,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<3.10"
|
||||
content-hash = "918b6736766a9c1b6732a56e1ef2e7a53241f2e25babb884881e49c299801fc9"
|
||||
content-hash = "85114054176aa164964acea6fdc085581ee7fc2f94c1cd03ad77611b82e52c79"
|
||||
|
@ -1,13 +1,13 @@
|
||||
[tool.poetry]
|
||||
name = "deepdog"
|
||||
version = "1.1.0"
|
||||
version = "1.3.0"
|
||||
description = ""
|
||||
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<3.10"
|
||||
pdme = "^1.2.0"
|
||||
numpy = "1.22.3"
|
||||
pdme = "^1.5.0"
|
||||
numpy = "1.26.4"
|
||||
scipy = "1.10"
|
||||
tqdm = "^4.66.2"
|
||||
|
||||
|
42
tests/direct_monte_carlo/test_cost_function_filter.py
Normal file
42
tests/direct_monte_carlo/test_cost_function_filter.py
Normal file
@ -0,0 +1,42 @@
|
||||
import deepdog.direct_monte_carlo.cost_function_filter
|
||||
import numpy
|
||||
|
||||
|
||||
def test_px_cost_function_filter_example():
|
||||
|
||||
dipoles_1 = [
|
||||
[1, 2, 3, 4, 5, 6, 7],
|
||||
[2, 3, 2, 5, 4, 7, 6],
|
||||
]
|
||||
|
||||
dipoles_2 = [
|
||||
[15, 9, 8, 7, 6, 5, 3],
|
||||
[30, 4, 4, 7, 3, 1, 4],
|
||||
]
|
||||
|
||||
dipoleses = numpy.array([dipoles_1, dipoles_2])
|
||||
|
||||
def cost_function(dipoleses: numpy.ndarray) -> numpy.ndarray:
|
||||
return dipoleses[:, :, 0].max(axis=-1)
|
||||
|
||||
expected_costs = numpy.array([2, 30])
|
||||
|
||||
numpy.testing.assert_array_equal(cost_function(dipoleses), expected_costs)
|
||||
|
||||
filter = deepdog.direct_monte_carlo.cost_function_filter.CostFunctionTargetFilter(
|
||||
cost_function, 5
|
||||
)
|
||||
|
||||
actual_filtered = filter.filter_samples(dipoleses)
|
||||
expected_filtered = numpy.array([dipoles_1])
|
||||
assert actual_filtered.size != 0
|
||||
numpy.testing.assert_array_equal(actual_filtered, expected_filtered)
|
||||
|
||||
filter_stricter = (
|
||||
deepdog.direct_monte_carlo.cost_function_filter.CostFunctionTargetFilter(
|
||||
cost_function, 0.5
|
||||
)
|
||||
)
|
||||
|
||||
actual_filtered_stricter = filter_stricter.filter_samples(dipoleses)
|
||||
assert actual_filtered_stricter.size == 0
|
@ -7,6 +7,7 @@ def test_parse_groupdict():
|
||||
)
|
||||
|
||||
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
@ -23,6 +24,30 @@ def test_parse_groupdict():
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
def test_parse_groupdict_with_magnitude():
|
||||
example_column_name = (
|
||||
"geom_-20_20_-10_10_0_5-magnitude_3.5-orientation_free-dipole_count_100_success"
|
||||
)
|
||||
|
||||
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
"xmax": "20",
|
||||
"ymin": "-10",
|
||||
"ymax": "10",
|
||||
"zmin": "0",
|
||||
"zmax": "5",
|
||||
"orientation": "free",
|
||||
"avg_filled": "100",
|
||||
"log_magnitude": "3.5",
|
||||
"field_name": "success",
|
||||
}
|
||||
)
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
# def test_parse_no_match_column_name():
|
||||
# parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
|
||||
# assert parsed is None
|
||||
|
@ -0,0 +1,10 @@
|
||||
# serializer version: 1
|
||||
# name: test_subset_simulation_multi_result_coalescing_easy_arithmetic
|
||||
MultiSubsetSimulationResult(child_results=[SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.8, lowest_likelihood=0.5, messages=[]), SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.6, lowest_likelihood=0.01, messages=[])], model_name='test', estimated_likelihood=0.6928203230275509, arithmetic_mean_estimated_likelihood=0.7, num_children=2, num_finished_children=2, clean_estimate=True)
|
||||
# ---
|
||||
# name: test_subset_simulation_multi_result_coalescing_easy_geometric
|
||||
MultiSubsetSimulationResult(child_results=[SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.1, lowest_likelihood=0.5, messages=[]), SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.001, lowest_likelihood=0.01, messages=[])], model_name='test', estimated_likelihood=0.010000000000000004, arithmetic_mean_estimated_likelihood=0.0505, num_children=2, num_finished_children=2, clean_estimate=True)
|
||||
# ---
|
||||
# name: test_subset_simulation_multi_result_coalescing_include_dirty
|
||||
MultiSubsetSimulationResult(child_results=[SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.8, lowest_likelihood=0.5, messages=[]), SubsetSimulationResult(probs_list=(), over_target_cost=1, over_target_likelihood=1, under_target_cost=0.99, under_target_likelihood=0.08, lowest_likelihood=0.01, messages=[]), SubsetSimulationResult(probs_list=(), over_target_cost=None, over_target_likelihood=None, under_target_cost=None, under_target_likelihood=None, lowest_likelihood=0.0001, messages=[])], model_name='test', estimated_likelihood=0.01856635533445112, arithmetic_mean_estimated_likelihood=0.29336666666666666, num_children=3, num_finished_children=2, clean_estimate=False)
|
||||
# ---
|
92
tests/subset_simulation/test_subset_simulation_coalescing.py
Normal file
92
tests/subset_simulation/test_subset_simulation_coalescing.py
Normal file
@ -0,0 +1,92 @@
|
||||
import deepdog.subset_simulation.subset_simulation_impl as impl
|
||||
import numpy
|
||||
|
||||
|
||||
def test_subset_simulation_multi_result_coalescing_include_dirty(snapshot):
|
||||
res1 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.8,
|
||||
lowest_likelihood=0.5,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
res2 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.08,
|
||||
lowest_likelihood=0.01,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
res3 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=None,
|
||||
over_target_likelihood=None,
|
||||
under_target_cost=None,
|
||||
under_target_likelihood=None,
|
||||
lowest_likelihood=0.0001,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
combined = impl.coalesce_ss_results("test", [res1, res2, res3])
|
||||
|
||||
assert combined == snapshot
|
||||
|
||||
|
||||
def test_subset_simulation_multi_result_coalescing_easy_arithmetic(snapshot):
|
||||
res1 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.8,
|
||||
lowest_likelihood=0.5,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
res2 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.6,
|
||||
lowest_likelihood=0.01,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
combined = impl.coalesce_ss_results("test", [res1, res2])
|
||||
|
||||
assert combined.arithmetic_mean_estimated_likelihood == 0.7
|
||||
assert combined == snapshot
|
||||
|
||||
|
||||
def test_subset_simulation_multi_result_coalescing_easy_geometric(snapshot):
|
||||
res1 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.1,
|
||||
lowest_likelihood=0.5,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
res2 = impl.SubsetSimulationResult(
|
||||
probs_list=(),
|
||||
over_target_cost=1,
|
||||
over_target_likelihood=1,
|
||||
under_target_cost=0.99,
|
||||
under_target_likelihood=0.001,
|
||||
lowest_likelihood=0.01,
|
||||
messages=[],
|
||||
)
|
||||
|
||||
combined = impl.coalesce_ss_results("test", [res1, res2])
|
||||
|
||||
numpy.testing.assert_allclose(combined.estimated_likelihood, 0.01)
|
||||
assert combined == snapshot
|
Loading…
x
Reference in New Issue
Block a user