Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
310977e9b8
|
|||
b10586bf55
|
|||
1741807be4
|
|||
9a4548def4
|
|||
b4e5f53726
|
@@ -2,6 +2,15 @@
|
|||||||
|
|
||||||
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
||||||
|
|
||||||
|
### [0.7.5](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.4...0.7.5) (2023-12-09)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* adds direct monte carlo package ([1741807](https://gitea.deepak.science:2222/physics/deepdog/commit/1741807be43d08fb51bc94518dd3b67585c04c20))
|
||||||
|
* adds longchain logging if logging last generation ([b4e5f53](https://gitea.deepak.science:2222/physics/deepdog/commit/b4e5f5372682fc64c3734a96c4a899e018f127ce))
|
||||||
|
* allows disabling timestamp in subset simulation bayes results ([9a4548d](https://gitea.deepak.science:2222/physics/deepdog/commit/9a4548def45a01f1f518135d4237c3dc09dcc342))
|
||||||
|
|
||||||
### [0.7.4](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.3...0.7.4) (2023-07-27)
|
### [0.7.4](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.3...0.7.4) (2023-07-27)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -73,6 +73,7 @@ class BayesRunWithSubspaceSimulation:
|
|||||||
ss_dump_last_generation=False,
|
ss_dump_last_generation=False,
|
||||||
ss_initial_costs_chunk_size=100,
|
ss_initial_costs_chunk_size=100,
|
||||||
write_output_to_bayesruncsv=True,
|
write_output_to_bayesruncsv=True,
|
||||||
|
use_timestamp_for_output=True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
|
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
|
||||||
dot_positions, frequency_range
|
dot_positions, frequency_range
|
||||||
@@ -110,8 +111,11 @@ class BayesRunWithSubspaceSimulation:
|
|||||||
|
|
||||||
self.probabilities = [1 / self.model_count] * self.model_count
|
self.probabilities = [1 / self.model_count] * self.model_count
|
||||||
|
|
||||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
if use_timestamp_for_output:
|
||||||
self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
|
||||||
|
else:
|
||||||
|
self.filename = f"{filename_slug}.bayesrunwithss.csv"
|
||||||
self.max_frequency = max_frequency
|
self.max_frequency = max_frequency
|
||||||
|
|
||||||
if end_threshold is not None:
|
if end_threshold is not None:
|
||||||
|
6
deepdog/direct_monte_carlo/__init__.py
Normal file
6
deepdog/direct_monte_carlo/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from deepdog.direct_monte_carlo.direct_mc import (
|
||||||
|
DirectMonteCarloRun,
|
||||||
|
DirectMonteCarloConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["DirectMonteCarloRun", "DirectMonteCarloConfig"]
|
157
deepdog/direct_monte_carlo/direct_mc.py
Normal file
157
deepdog/direct_monte_carlo/direct_mc.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
import pdme.model
|
||||||
|
import pdme.measurement
|
||||||
|
import pdme.measurement.input_types
|
||||||
|
import pdme.subspace_simulation
|
||||||
|
from typing import Tuple, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import logging
|
||||||
|
import numpy
|
||||||
|
import numpy.random
|
||||||
|
import pdme.util.fast_v_calc
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DirectMonteCarloResult:
|
||||||
|
successes: int
|
||||||
|
monte_carlo_count: int
|
||||||
|
likelihood: float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DirectMonteCarloConfig:
|
||||||
|
monte_carlo_count_per_cycle: int = 10000
|
||||||
|
monte_carlo_cycles: int = 10
|
||||||
|
target_success: int = 100
|
||||||
|
max_monte_carlo_cycles_steps: int = 10
|
||||||
|
monte_carlo_seed: int = 1234
|
||||||
|
write_successes_to_file: bool = False
|
||||||
|
tag: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class DirectMonteCarloRun:
|
||||||
|
"""
|
||||||
|
A single model Direct Monte Carlo run, currently implemented only using single threading.
|
||||||
|
An encapsulation of the steps needed for a Bayes run.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
model_name_pair : Sequence[Tuple(str, pdme.model.DipoleModel)]
|
||||||
|
The model to evaluate, with name.
|
||||||
|
|
||||||
|
measurements: Sequence[pdme.measurement.DotRangeMeasurement]
|
||||||
|
The measurements as dot ranges to use as the bounds for the Monte Carlo calculation.
|
||||||
|
|
||||||
|
monte_carlo_count_per_cycle: int
|
||||||
|
The number of Monte Carlo iterations to use in a single cycle calculation.
|
||||||
|
|
||||||
|
monte_carlo_cycles: int
|
||||||
|
The number of cycles to use in each step.
|
||||||
|
Increasing monte_carlo_count_per_cycle increases memory usage (and runtime), while this increases runtime, allowing
|
||||||
|
control over memory use.
|
||||||
|
|
||||||
|
target_success: int
|
||||||
|
The number of successes to target before exiting early.
|
||||||
|
Should likely be ~100 but can go higher to.
|
||||||
|
|
||||||
|
max_monte_carlo_cycles_steps: int
|
||||||
|
The number of steps to use. Each step consists of monte_carlo_cycles cycles, each of which has monte_carlo_count_per_cycle iterations.
|
||||||
|
|
||||||
|
monte_carlo_seed: int
|
||||||
|
The seed to use for the RNG.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model_name_pair: Tuple[str, pdme.model.DipoleModel],
|
||||||
|
measurements: Sequence[pdme.measurement.DotRangeMeasurement],
|
||||||
|
config: DirectMonteCarloConfig,
|
||||||
|
):
|
||||||
|
self.model_name, self.model = model_name_pair
|
||||||
|
|
||||||
|
self.measurements = measurements
|
||||||
|
self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
|
||||||
|
|
||||||
|
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
|
||||||
|
self.dot_inputs
|
||||||
|
)
|
||||||
|
|
||||||
|
self.config = config
|
||||||
|
(
|
||||||
|
self.lows,
|
||||||
|
self.highs,
|
||||||
|
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
|
||||||
|
self.measurements
|
||||||
|
)
|
||||||
|
|
||||||
|
def _single_run(self, seed) -> numpy.ndarray:
|
||||||
|
rng = numpy.random.default_rng(seed)
|
||||||
|
|
||||||
|
sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
|
||||||
|
self.config.monte_carlo_count_per_cycle, -1, rng
|
||||||
|
)
|
||||||
|
|
||||||
|
current_sample = sample_dipoles
|
||||||
|
for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs):
|
||||||
|
|
||||||
|
if len(current_sample) < 1:
|
||||||
|
break
|
||||||
|
vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
|
||||||
|
numpy.array([di]), current_sample
|
||||||
|
)
|
||||||
|
|
||||||
|
current_sample = current_sample[
|
||||||
|
numpy.all((vals > low) & (vals < high), axis=1)
|
||||||
|
]
|
||||||
|
return current_sample
|
||||||
|
|
||||||
|
def execute(self) -> DirectMonteCarloResult:
|
||||||
|
step_count = 0
|
||||||
|
total_success = 0
|
||||||
|
total_count = 0
|
||||||
|
|
||||||
|
count_per_step = (
|
||||||
|
self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
|
||||||
|
)
|
||||||
|
seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed)
|
||||||
|
while (step_count < self.config.max_monte_carlo_cycles_steps) and (
|
||||||
|
total_success < self.config.target_success
|
||||||
|
):
|
||||||
|
_logger.debug(f"Executing step {step_count}")
|
||||||
|
for cycle_i, seed in enumerate(
|
||||||
|
seed_sequence.spawn(self.config.monte_carlo_cycles)
|
||||||
|
):
|
||||||
|
cycle_success_configs = self._single_run(seed)
|
||||||
|
cycle_success_count = len(cycle_success_configs)
|
||||||
|
if cycle_success_count > 0:
|
||||||
|
_logger.debug(
|
||||||
|
f"For cycle {cycle_i} received {cycle_success_count} successes"
|
||||||
|
)
|
||||||
|
_logger.debug(cycle_success_configs)
|
||||||
|
if self.config.write_successes_to_file:
|
||||||
|
sorted_by_freq = numpy.array(
|
||||||
|
[
|
||||||
|
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(
|
||||||
|
dipole_config
|
||||||
|
)
|
||||||
|
for dipole_config in cycle_success_configs
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dipole_count = numpy.array(cycle_success_configs).shape[1]
|
||||||
|
for n in range(dipole_count):
|
||||||
|
numpy.savetxt(
|
||||||
|
f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv",
|
||||||
|
sorted_by_freq[:, n],
|
||||||
|
delimiter=",",
|
||||||
|
)
|
||||||
|
total_success += cycle_success_count
|
||||||
|
_logger.debug(f"At end of step {step_count} have {total_success} successes")
|
||||||
|
step_count += 1
|
||||||
|
total_count += count_per_step
|
||||||
|
|
||||||
|
return DirectMonteCarloResult(
|
||||||
|
successes=total_success,
|
||||||
|
monte_carlo_count=total_count,
|
||||||
|
likelihood=total_success / total_count,
|
||||||
|
)
|
@@ -101,11 +101,17 @@ class SubsetSimulation:
|
|||||||
# _logger.debug(sample_dipoles.shape)
|
# _logger.debug(sample_dipoles.shape)
|
||||||
|
|
||||||
raw_costs = []
|
raw_costs = []
|
||||||
_logger.debug(f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}")
|
_logger.debug(
|
||||||
|
f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}"
|
||||||
|
)
|
||||||
|
|
||||||
for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size):
|
for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size):
|
||||||
_logger.debug(f"doing chunk {x}")
|
_logger.debug(f"doing chunk {x}")
|
||||||
raw_costs.extend(self.cost_function_to_use(sample_dipoles[x: x + self.initial_cost_chunk_size]))
|
raw_costs.extend(
|
||||||
|
self.cost_function_to_use(
|
||||||
|
sample_dipoles[x : x + self.initial_cost_chunk_size]
|
||||||
|
)
|
||||||
|
)
|
||||||
costs = numpy.array(raw_costs)
|
costs = numpy.array(raw_costs)
|
||||||
|
|
||||||
_logger.debug(f"costs: {costs}")
|
_logger.debug(f"costs: {costs}")
|
||||||
@@ -143,6 +149,37 @@ class SubsetSimulation:
|
|||||||
delimiter=",",
|
delimiter=",",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
|
||||||
|
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
|
||||||
|
_logger.info(f"got stdevs: {stdevs.stdevs}")
|
||||||
|
all_long_chains = []
|
||||||
|
for seed_index, (c, s) in enumerate(
|
||||||
|
next_seeds[:: len(next_seeds) // 20]
|
||||||
|
):
|
||||||
|
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
|
||||||
|
# until new version gotta do
|
||||||
|
_logger.debug(f"\t{seed_index}: doing long chain on the next seed")
|
||||||
|
|
||||||
|
long_chain = self.model.get_mcmc_chain(
|
||||||
|
s,
|
||||||
|
self.cost_function_to_use,
|
||||||
|
1000,
|
||||||
|
threshold_cost,
|
||||||
|
stdevs,
|
||||||
|
initial_cost=c,
|
||||||
|
rng_arg=mcmc_rng,
|
||||||
|
)
|
||||||
|
for _, chained in long_chain:
|
||||||
|
all_long_chains.append(chained)
|
||||||
|
all_long_chains_array = numpy.array(all_long_chains)
|
||||||
|
for n in range(self.model.n):
|
||||||
|
_logger.info(f"{all_long_chains_array[:, n].shape}")
|
||||||
|
numpy.savetxt(
|
||||||
|
f"long_chain_generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
|
||||||
|
all_long_chains_array[:, n],
|
||||||
|
delimiter=",",
|
||||||
|
)
|
||||||
|
|
||||||
if self.keep_probs_list:
|
if self.keep_probs_list:
|
||||||
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
|
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
|
||||||
probs_list.append(
|
probs_list.append(
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "deepdog"
|
name = "deepdog"
|
||||||
version = "0.7.4"
|
version = "0.7.5"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
|
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
|
||||||
|
|
||||||
|
@@ -151,7 +151,7 @@ def test_bayesss_with_tighter_cost(snapshot):
|
|||||||
ss_default_upper_w_log_step=4,
|
ss_default_upper_w_log_step=4,
|
||||||
ss_dump_last_generation=False,
|
ss_dump_last_generation=False,
|
||||||
write_output_to_bayesruncsv=False,
|
write_output_to_bayesruncsv=False,
|
||||||
ss_initial_costs_chunk_size=1
|
ss_initial_costs_chunk_size=1,
|
||||||
)
|
)
|
||||||
result = square_run.go()
|
result = square_run.go()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user