chore(release): 0.7.5

fmt: auto format changes
feat: adds direct monte carlo package
2023-12-09 16:27:30 -06:00 · 2023-12-09 16:25:57 -06:00 · 2023-12-09 16:24:20 -06:00 · 2023-12-09 16:23:45 -06:00 · 2023-08-12 19:48:30 -05:00
7 changed files with 219 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,15 @@
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
 ### [0.7.5](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.4...0.7.5) (2023-12-09)
 ### Features
 * adds direct monte carlo package ([1741807](https://gitea.deepak.science:2222/physics/deepdog/commit/1741807be43d08fb51bc94518dd3b67585c04c20))
 * adds longchain logging if logging last generation ([b4e5f53](https://gitea.deepak.science:2222/physics/deepdog/commit/b4e5f5372682fc64c3734a96c4a899e018f127ce))
 * allows disabling timestamp in subset simulation bayes results ([9a4548d](https://gitea.deepak.science:2222/physics/deepdog/commit/9a4548def45a01f1f518135d4237c3dc09dcc342))
 ### [0.7.4](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.3...0.7.4) (2023-07-27)
--- a/deepdog/bayes_run_with_ss.py
+++ b/deepdog/bayes_run_with_ss.py
@@ -73,6 +73,7 @@ class BayesRunWithSubspaceSimulation:
 		ss_dump_last_generation=False,
 		ss_initial_costs_chunk_size=100,
 		write_output_to_bayesruncsv=True,
 		use_timestamp_for_output=True,
 	) -> None:
 		self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
 			dot_positions, frequency_range
@@ -110,8 +111,11 @@ class BayesRunWithSubspaceSimulation:
 		self.probabilities = [1 / self.model_count] * self.model_count
-		timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+		if use_timestamp_for_output:
-		self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
+			timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
 			self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
 		else:
 			self.filename = f"{filename_slug}.bayesrunwithss.csv"
 		self.max_frequency = max_frequency
 		if end_threshold is not None:
--- a/deepdog/direct_monte_carlo/init.py
+++ b/deepdog/direct_monte_carlo/init.py
@@ -0,0 +1,6 @@
 from deepdog.direct_monte_carlo.direct_mc import (
 	DirectMonteCarloRun,
 	DirectMonteCarloConfig,
 )
 __all__ = ["DirectMonteCarloRun", "DirectMonteCarloConfig"]
--- a/deepdog/direct_monte_carlo/direct_mc.py
+++ b/deepdog/direct_monte_carlo/direct_mc.py
@@ -0,0 +1,157 @@
 import pdme.model
 import pdme.measurement
 import pdme.measurement.input_types
 import pdme.subspace_simulation
 from typing import Tuple, Sequence
 from dataclasses import dataclass
 import logging
 import numpy
 import numpy.random
 import pdme.util.fast_v_calc
 _logger = logging.getLogger(__name__)
@dataclass
 class DirectMonteCarloResult:
 	successes: int
 	monte_carlo_count: int
 	likelihood: float
@dataclass
 class DirectMonteCarloConfig:
 	monte_carlo_count_per_cycle: int = 10000
 	monte_carlo_cycles: int = 10
 	target_success: int = 100
 	max_monte_carlo_cycles_steps: int = 10
 	monte_carlo_seed: int = 1234
 	write_successes_to_file: bool = False
 	tag: str = ""
 class DirectMonteCarloRun:
 	"""
 	A single model Direct Monte Carlo run, currently implemented only using single threading.
 	An encapsulation of the steps needed for a Bayes run.
 	Parameters
 	----------
 	model_name_pair : Sequence[Tuple(str, pdme.model.DipoleModel)]
 	The model to evaluate, with name.
 	measurements: Sequence[pdme.measurement.DotRangeMeasurement]
 	The measurements as dot ranges to use as the bounds for the Monte Carlo calculation.
 	monte_carlo_count_per_cycle: int
 	The number of Monte Carlo iterations to use in a single cycle calculation.
 	monte_carlo_cycles: int
 	The number of cycles to use in each step.
 	Increasing monte_carlo_count_per_cycle increases memory usage (and runtime), while this increases runtime, allowing
 	control over memory use.
 	target_success: int
 	The number of successes to target before exiting early.
 	Should likely be ~100 but can go higher to.
 	max_monte_carlo_cycles_steps: int
 	The number of steps to use. Each step consists of monte_carlo_cycles cycles, each of which has monte_carlo_count_per_cycle iterations.
 	monte_carlo_seed: int
 	The seed to use for the RNG.
 	"""
 	def __init__(
 		self,
 		model_name_pair: Tuple[str, pdme.model.DipoleModel],
 		measurements: Sequence[pdme.measurement.DotRangeMeasurement],
 		config: DirectMonteCarloConfig,
 	):
 		self.model_name, self.model = model_name_pair
 		self.measurements = measurements
 		self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
 		self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
 			self.dot_inputs
 		)
 		self.config = config
 		(
 			self.lows,
 			self.highs,
 		) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
 			self.measurements
 		)
 	def _single_run(self, seed) -> numpy.ndarray:
 		rng = numpy.random.default_rng(seed)
 		sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
 			self.config.monte_carlo_count_per_cycle, -1, rng
 		)
 		current_sample = sample_dipoles
 		for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs):
 			if len(current_sample) < 1:
 				break
 			vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
 				numpy.array([di]), current_sample
 			)
 			current_sample = current_sample[
 				numpy.all((vals > low) & (vals < high), axis=1)
 			]
 		return current_sample
 	def execute(self) -> DirectMonteCarloResult:
 		step_count = 0
 		total_success = 0
 		total_count = 0
 		count_per_step = (
 			self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
 		)
 		seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed)
 		while (step_count < self.config.max_monte_carlo_cycles_steps) and (
 			total_success < self.config.target_success
 		):
 			_logger.debug(f"Executing step {step_count}")
 			for cycle_i, seed in enumerate(
 				seed_sequence.spawn(self.config.monte_carlo_cycles)
 			):
 				cycle_success_configs = self._single_run(seed)
 				cycle_success_count = len(cycle_success_configs)
 				if cycle_success_count > 0:
 					_logger.debug(
 						f"For cycle {cycle_i} received {cycle_success_count} successes"
 					)
 					_logger.debug(cycle_success_configs)
 					if self.config.write_successes_to_file:
 						sorted_by_freq = numpy.array(
 							[
 								pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(
 									dipole_config
 								)
 								for dipole_config in cycle_success_configs
 							]
 						)
 						dipole_count = numpy.array(cycle_success_configs).shape[1]
 						for n in range(dipole_count):
 							numpy.savetxt(
 								f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv",
 								sorted_by_freq[:, n],
 								delimiter=",",
 							)
 				total_success += cycle_success_count
 			_logger.debug(f"At end of step {step_count} have {total_success} successes")
 			step_count += 1
 			total_count += count_per_step
 		return DirectMonteCarloResult(
 			successes=total_success,
 			monte_carlo_count=total_count,
 			likelihood=total_success / total_count,
 		)
--- a/deepdog/subset_simulation/subset_simulation_impl.py
+++ b/deepdog/subset_simulation/subset_simulation_impl.py
@@ -101,11 +101,17 @@ class SubsetSimulation:
 		# _logger.debug(sample_dipoles.shape)
 		raw_costs = []
-		_logger.debug(f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}")
+		_logger.debug(
 			f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}"
 		)
 		for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size):
 			_logger.debug(f"doing chunk {x}")
-			raw_costs.extend(self.cost_function_to_use(sample_dipoles[x: x + self.initial_cost_chunk_size]))
+			raw_costs.extend(
 				self.cost_function_to_use(
 					sample_dipoles[x : x + self.initial_cost_chunk_size]
 				)
 			)
 		costs = numpy.array(raw_costs)
 		_logger.debug(f"costs: {costs}")
@@ -143,6 +149,37 @@ class SubsetSimulation:
 						delimiter=",",
 					)
 				next_seeds_as_array = numpy.array([s for _, s in next_seeds])
 				stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
 				_logger.info(f"got stdevs: {stdevs.stdevs}")
 				all_long_chains = []
 				for seed_index, (c, s) in enumerate(
 					next_seeds[:: len(next_seeds) // 20]
 				):
 					# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
 					# until new version gotta do
 					_logger.debug(f"\t{seed_index}: doing long chain on the next seed")
 					long_chain = self.model.get_mcmc_chain(
 						s,
 						self.cost_function_to_use,
 						1000,
 						threshold_cost,
 						stdevs,
 						initial_cost=c,
 						rng_arg=mcmc_rng,
 					)
 					for _, chained in long_chain:
 						all_long_chains.append(chained)
 				all_long_chains_array = numpy.array(all_long_chains)
 				for n in range(self.model.n):
 					_logger.info(f"{all_long_chains_array[:, n].shape}")
 					numpy.savetxt(
 						f"long_chain_generation_{self.n_c}_{self.n_s}_{i}_dipole_{n}.csv",
 						all_long_chains_array[:, n],
 						delimiter=",",
 					)
 			if self.keep_probs_list:
 				for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
 					probs_list.append(
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepdog"
-version = "0.7.4"
+version = "0.7.5"
 description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
--- a/tests/test_bayes_run_with_ss.py
+++ b/tests/test_bayes_run_with_ss.py
@@ -151,7 +151,7 @@ def test_bayesss_with_tighter_cost(snapshot):
 		ss_default_upper_w_log_step=4,
 		ss_dump_last_generation=False,
 		write_output_to_bayesruncsv=False,
-		ss_initial_costs_chunk_size=1
+		ss_initial_costs_chunk_size=1,
 	)
 	result = square_run.go()
Author	SHA1	Message	Date
Deepak Mallubhotla	310977e9b8	chore(release): 0.7.5 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2023-12-09 16:27:30 -06:00
Deepak Mallubhotla	b10586bf55	fmt: auto format changes All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2023-12-09 16:25:57 -06:00
Deepak Mallubhotla	1741807be4	feat: adds direct monte carlo package	2023-12-09 16:24:20 -06:00
Deepak Mallubhotla	9a4548def4	feat: allows disabling timestamp in subset simulation bayes results	2023-12-09 16:23:45 -06:00
Deepak Mallubhotla	b4e5f53726	feat: adds longchain logging if logging last generation Some checks failed gitea-physics/deepdog/pipeline/head There was a failure building this commit Details	2023-08-12 19:48:30 -05:00