chore(deps): update dependency mypy to v1

chore(release): 0.7.5
fmt: auto format changes
2023-12-22 01:30:58 +00:00 · 2023-12-09 16:27:30 -06:00 · 2023-12-09 16:25:57 -06:00 · 2023-12-09 16:24:20 -06:00 · 2023-12-09 16:23:45 -06:00
7 changed files with 193 additions and 13 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,15 @@

 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.

+### [0.7.5](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.4...0.7.5) (2023-12-09)
+
+
+### Features
+
+* adds direct monte carlo package ([1741807](https://gitea.deepak.science:2222/physics/deepdog/commit/1741807be43d08fb51bc94518dd3b67585c04c20))
+* adds longchain logging if logging last generation ([b4e5f53](https://gitea.deepak.science:2222/physics/deepdog/commit/b4e5f5372682fc64c3734a96c4a899e018f127ce))
+* allows disabling timestamp in subset simulation bayes results ([9a4548d](https://gitea.deepak.science:2222/physics/deepdog/commit/9a4548def45a01f1f518135d4237c3dc09dcc342))
+
 ### [0.7.4](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.3...0.7.4) (2023-07-27)


--- a/deepdog/bayes_run_with_ss.py
+++ b/deepdog/bayes_run_with_ss.py
@ -73,6 +73,7 @@ class BayesRunWithSubspaceSimulation:
 		ss_dump_last_generation=False,
 		ss_initial_costs_chunk_size=100,
 		write_output_to_bayesruncsv=True,
+		use_timestamp_for_output=True,
 	) -> None:
 		self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
 			dot_positions, frequency_range
@ -110,8 +111,11 @@ class BayesRunWithSubspaceSimulation:

 		self.probabilities = [1 / self.model_count] * self.model_count

-		timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-		self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
+		if use_timestamp_for_output:
+			timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+			self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
+		else:
+			self.filename = f"{filename_slug}.bayesrunwithss.csv"
 		self.max_frequency = max_frequency

 		if end_threshold is not None:
--- a/deepdog/direct_monte_carlo/init.py
+++ b/deepdog/direct_monte_carlo/init.py
@ -0,0 +1,6 @@
+from deepdog.direct_monte_carlo.direct_mc import (
+	DirectMonteCarloRun,
+	DirectMonteCarloConfig,
+)
+
+__all__ = ["DirectMonteCarloRun", "DirectMonteCarloConfig"]
--- a/deepdog/direct_monte_carlo/direct_mc.py
+++ b/deepdog/direct_monte_carlo/direct_mc.py
@ -0,0 +1,157 @@
+import pdme.model
+import pdme.measurement
+import pdme.measurement.input_types
+import pdme.subspace_simulation
+from typing import Tuple, Sequence
+from dataclasses import dataclass
+import logging
+import numpy
+import numpy.random
+import pdme.util.fast_v_calc
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DirectMonteCarloResult:
+	successes: int
+	monte_carlo_count: int
+	likelihood: float
+
+
+@dataclass
+class DirectMonteCarloConfig:
+	monte_carlo_count_per_cycle: int = 10000
+	monte_carlo_cycles: int = 10
+	target_success: int = 100
+	max_monte_carlo_cycles_steps: int = 10
+	monte_carlo_seed: int = 1234
+	write_successes_to_file: bool = False
+	tag: str = ""
+
+
+class DirectMonteCarloRun:
+	"""
+	A single model Direct Monte Carlo run, currently implemented only using single threading.
+	An encapsulation of the steps needed for a Bayes run.
+
+	Parameters
+	----------
+	model_name_pair : Sequence[Tuple(str, pdme.model.DipoleModel)]
+	The model to evaluate, with name.
+
+	measurements: Sequence[pdme.measurement.DotRangeMeasurement]
+	The measurements as dot ranges to use as the bounds for the Monte Carlo calculation.
+
+	monte_carlo_count_per_cycle: int
+	The number of Monte Carlo iterations to use in a single cycle calculation.
+
+	monte_carlo_cycles: int
+	The number of cycles to use in each step.
+	Increasing monte_carlo_count_per_cycle increases memory usage (and runtime), while this increases runtime, allowing
+	control over memory use.
+
+	target_success: int
+	The number of successes to target before exiting early.
+	Should likely be ~100 but can go higher to.
+
+	max_monte_carlo_cycles_steps: int
+	The number of steps to use. Each step consists of monte_carlo_cycles cycles, each of which has monte_carlo_count_per_cycle iterations.
+
+	monte_carlo_seed: int
+	The seed to use for the RNG.
+	"""
+
+	def __init__(
+		self,
+		model_name_pair: Tuple[str, pdme.model.DipoleModel],
+		measurements: Sequence[pdme.measurement.DotRangeMeasurement],
+		config: DirectMonteCarloConfig,
+	):
+		self.model_name, self.model = model_name_pair
+
+		self.measurements = measurements
+		self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
+
+		self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
+			self.dot_inputs
+		)
+
+		self.config = config
+		(
+			self.lows,
+			self.highs,
+		) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
+			self.measurements
+		)
+
+	def _single_run(self, seed) -> numpy.ndarray:
+		rng = numpy.random.default_rng(seed)
+
+		sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
+			self.config.monte_carlo_count_per_cycle, -1, rng
+		)
+
+		current_sample = sample_dipoles
+		for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs):
+
+			if len(current_sample) < 1:
+				break
+			vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
+				numpy.array([di]), current_sample
+			)
+
+			current_sample = current_sample[
+				numpy.all((vals > low) & (vals < high), axis=1)
+			]
+		return current_sample
+
+	def execute(self) -> DirectMonteCarloResult:
+		step_count = 0
+		total_success = 0
+		total_count = 0
+
+		count_per_step = (
+			self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
+		)
+		seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed)
+		while (step_count < self.config.max_monte_carlo_cycles_steps) and (
+			total_success < self.config.target_success
+		):
+			_logger.debug(f"Executing step {step_count}")
+			for cycle_i, seed in enumerate(
+				seed_sequence.spawn(self.config.monte_carlo_cycles)
+			):
+				cycle_success_configs = self._single_run(seed)
+				cycle_success_count = len(cycle_success_configs)
+				if cycle_success_count > 0:
+					_logger.debug(
+						f"For cycle {cycle_i} received {cycle_success_count} successes"
+					)
+					_logger.debug(cycle_success_configs)
+					if self.config.write_successes_to_file:
+						sorted_by_freq = numpy.array(
+							[
+								pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(
+									dipole_config
+								)
+								for dipole_config in cycle_success_configs
+							]
+						)
+						dipole_count = numpy.array(cycle_success_configs).shape[1]
+						for n in range(dipole_count):
+							numpy.savetxt(
+								f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv",
+								sorted_by_freq[:, n],
+								delimiter=",",
+							)
+				total_success += cycle_success_count
+			_logger.debug(f"At end of step {step_count} have {total_success} successes")
+			step_count += 1
+			total_count += count_per_step
+
+		return DirectMonteCarloResult(
+			successes=total_success,
+			monte_carlo_count=total_count,
+			likelihood=total_success / total_count,
+		)
--- a/deepdog/subset_simulation/subset_simulation_impl.py
+++ b/deepdog/subset_simulation/subset_simulation_impl.py
@ -101,11 +101,17 @@ class SubsetSimulation:
 		# _logger.debug(sample_dipoles.shape)

 		raw_costs = []
-		_logger.debug(f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}")
+		_logger.debug(
+			f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}"
+		)

 		for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size):
 			_logger.debug(f"doing chunk {x}")
-			raw_costs.extend(self.cost_function_to_use(sample_dipoles[x: x + self.initial_cost_chunk_size]))
+			raw_costs.extend(
+				self.cost_function_to_use(
+					sample_dipoles[x : x + self.initial_cost_chunk_size]
+				)
+			)
 		costs = numpy.array(raw_costs)

 		_logger.debug(f"costs: {costs}")
@ -147,13 +153,12 @@ class SubsetSimulation:
 				stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
 				_logger.info(f"got stdevs: {stdevs.stdevs}")
 				all_long_chains = []
-				for seed_index, (c, s) in enumerate(next_seeds[::len(next_seeds) // 20]):
+				for seed_index, (c, s) in enumerate(
+					next_seeds[:: len(next_seeds) // 20]
+				):
 					# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
 					# until new version gotta do
-					_logger.debug(
-						f"\t{seed_index}: doing long chain on the next seed"
-					)
-				
+					_logger.debug(f"\t{seed_index}: doing long chain on the next seed")

 					long_chain = self.model.get_mcmc_chain(
 						s,
@ -175,7 +180,6 @@ class SubsetSimulation:
 						delimiter=",",
 					)

-
 			if self.keep_probs_list:
 				for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
 					probs_list.append(
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepdog"
-version = "0.7.4"
+version = "0.7.5"
 description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]

@ -14,7 +14,7 @@ scipy = "1.10"
 pytest = ">=6"
 flake8 = "^4.0.1"
 pytest-cov = "^4.1.0"
-mypy = "^0.971"
+mypy = "^1.8"
 python-semantic-release = "^7.24.0"
 black = "^22.3.0"
 syrupy = "^4.0.8"
--- a/tests/test_bayes_run_with_ss.py
+++ b/tests/test_bayes_run_with_ss.py
@ -151,7 +151,7 @@ def test_bayesss_with_tighter_cost(snapshot):
 		ss_default_upper_w_log_step=4,
 		ss_dump_last_generation=False,
 		write_output_to_bayesruncsv=False,
-		ss_initial_costs_chunk_size=1
+		ss_initial_costs_chunk_size=1,
 	)
 	result = square_run.go()
Author	SHA1	Message	Date
Renovate Bot	9d564fa2d3	chore(deps): update dependency mypy to v1 Some checks reported errors renovate/artifacts Artifact file update failure gitea-physics/deepdog/pipeline/pr-master Something is wrong with the build of this commit Details	2023-12-22 01:30:58 +00:00
Deepak Mallubhotla	310977e9b8	chore(release): 0.7.5 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2023-12-09 16:27:30 -06:00
Deepak Mallubhotla	b10586bf55	fmt: auto format changes All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2023-12-09 16:25:57 -06:00
Deepak Mallubhotla	1741807be4	feat: adds direct monte carlo package	2023-12-09 16:24:20 -06:00
Deepak Mallubhotla	9a4548def4	feat: allows disabling timestamp in subset simulation bayes results	2023-12-09 16:23:45 -06:00