diff --git a/deepdog/__init__.py b/deepdog/__init__.py index 3ca9496..13039a7 100644 --- a/deepdog/__init__.py +++ b/deepdog/__init__.py @@ -1,10 +1,7 @@ import logging from deepdog.meta import __version__ -from deepdog.bayes_run import BayesRun -from deepdog.bayes_run_simulpairs import BayesRunSimulPairs from deepdog.real_spectrum_run import RealSpectrumRun from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun -from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation def get_version(): @@ -13,11 +10,8 @@ def get_version(): __all__ = [ "get_version", - "BayesRun", - "BayesRunSimulPairs", "RealSpectrumRun", "TempAwareRealSpectrumRun", - "BayesRunWithSubspaceSimulation", ] diff --git a/deepdog/bayes_run.py b/deepdog/bayes_run.py deleted file mode 100644 index d50aeb5..0000000 --- a/deepdog/bayes_run.py +++ /dev/null @@ -1,281 +0,0 @@ -import pdme.inputs -import pdme.model -import pdme.measurement.input_types -import pdme.measurement.oscillating_dipole -import pdme.util.fast_v_calc -import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List -import datetime -import csv -import multiprocessing -import logging -import numpy - - -# TODO: remove hardcode -CHUNKSIZE = 50 - -# TODO: It's garbage to have this here duplicated from pdme. -DotInput = Tuple[numpy.typing.ArrayLike, float] - - -_logger = logging.getLogger(__name__) - - -def get_a_result(input) -> int: - model, dot_inputs, lows, highs, monte_carlo_count, max_frequency, seed = input - - rng = numpy.random.default_rng(seed) - sample_dipoles = model.get_monte_carlo_dipole_inputs( - monte_carlo_count, max_frequency, rng_to_use=rng - ) - vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) - return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) - - -def get_a_result_using_pairs(input) -> int: - ( - model, - dot_inputs, - pair_inputs, - local_lows, - local_highs, - nonlocal_lows, - nonlocal_highs, - monte_carlo_count, - max_frequency, - ) = input - sample_dipoles = model.get_n_single_dipoles(monte_carlo_count, max_frequency) - local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoles(dot_inputs, sample_dipoles) - local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs) - nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal( - pair_inputs, sample_dipoles - ) - nonlocal_matches = pdme.util.fast_v_calc.between( - nonlocal_vals, nonlocal_lows, nonlocal_highs - ) - combined_matches = numpy.logical_and(local_matches, nonlocal_matches) - return numpy.count_nonzero(combined_matches) - - -class BayesRun: - """ - A single Bayes run for a given set of dots. - - Parameters - ---------- - dot_inputs : Sequence[DotInput] - The dot inputs for this bayes run. - - models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] - The models to evaluate. - - actual_model : pdme.model.DipoleModel - The model which is actually correct. - - filename_slug : str - The filename slug to include. - - run_count: int - The number of runs to do. - """ - - def __init__( - self, - dot_positions: Sequence[numpy.typing.ArrayLike], - frequency_range: Sequence[float], - models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], - actual_model: pdme.model.DipoleModel, - filename_slug: str, - run_count: int = 100, - low_error: float = 0.9, - high_error: float = 1.1, - monte_carlo_count: int = 10000, - monte_carlo_cycles: int = 10, - target_success: int = 100, - max_monte_carlo_cycles_steps: int = 10, - max_frequency: float = 20, - end_threshold: float = None, - chunksize: int = CHUNKSIZE, - ) -> None: - self.dot_inputs = pdme.inputs.inputs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( - self.dot_inputs - ) - - self.models = [model for (_, model) in models_with_names] - self.model_names = [name for (name, _) in models_with_names] - self.actual_model = actual_model - - self.n: int - try: - self.n = self.actual_model.n # type: ignore - except AttributeError: - self.n = 1 - - self.model_count = len(self.models) - self.monte_carlo_count = monte_carlo_count - self.monte_carlo_cycles = monte_carlo_cycles - self.target_success = target_success - self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps - self.run_count = run_count - self.low_error = low_error - self.high_error = high_error - - self.csv_fields = [] - for i in range(self.n): - self.csv_fields.extend( - [ - f"dipole_moment_{i+1}", - f"dipole_location_{i+1}", - f"dipole_frequency_{i+1}", - ] - ) - self.compensate_zeros = True - self.chunksize = chunksize - for name in self.model_names: - self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"]) - - self.probabilities = [1 / self.model_count] * self.model_count - - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.filename = f"{timestamp}-{filename_slug}.bayesrun.csv" - self.max_frequency = max_frequency - - if end_threshold is not None: - if 0 < end_threshold < 1: - self.end_threshold: float = end_threshold - self.use_end_threshold = True - _logger.info(f"Will abort early, at {self.end_threshold}.") - else: - raise ValueError( - f"end_threshold should be between 0 and 1, but is actually {end_threshold}" - ) - - def go(self) -> None: - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") - writer.writeheader() - - for run in range(1, self.run_count + 1): - - # Generate the actual dipoles - actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) - - dots = actual_dipoles.get_percent_range_dot_measurements( - self.dot_inputs, self.low_error, self.high_error - ) - ( - lows, - highs, - ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( - dots - ) - - _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") - - # define a new seed sequence for each run - seed_sequence = numpy.random.SeedSequence(run) - - results = [] - _logger.debug("Going to iterate over models now") - for model_count, model in enumerate(self.models): - _logger.debug(f"Doing model #{model_count}") - core_count = multiprocessing.cpu_count() - 1 or 1 - with multiprocessing.Pool(core_count) as pool: - cycle_count = 0 - cycle_success = 0 - cycles = 0 - while (cycles < self.max_monte_carlo_cycles_steps) and ( - cycle_success <= self.target_success - ): - _logger.debug(f"Starting cycle {cycles}") - cycles += 1 - current_success = 0 - cycle_count += self.monte_carlo_count * self.monte_carlo_cycles - - # generate a seed from the sequence for each core. - # note this needs to be inside the loop for monte carlo cycle steps! - # that way we get more stuff. - seeds = seed_sequence.spawn(self.monte_carlo_cycles) - - current_success = sum( - pool.imap_unordered( - get_a_result, - [ - ( - model, - self.dot_inputs_array, - lows, - highs, - self.monte_carlo_count, - self.max_frequency, - seed, - ) - for seed in seeds - ], - self.chunksize, - ) - ) - - cycle_success += current_success - _logger.debug(f"current running successes: {cycle_success}") - results.append((cycle_count, cycle_success)) - - _logger.debug("Done, constructing output now") - row = { - "dipole_moment_1": actual_dipoles.dipoles[0].p, - "dipole_location_1": actual_dipoles.dipoles[0].s, - "dipole_frequency_1": actual_dipoles.dipoles[0].w, - } - for i in range(1, self.n): - try: - current_dipoles = actual_dipoles.dipoles[i] - row[f"dipole_moment_{i+1}"] = current_dipoles.p - row[f"dipole_location_{i+1}"] = current_dipoles.s - row[f"dipole_frequency_{i+1}"] = current_dipoles.w - except IndexError: - _logger.info(f"Not writing anymore, saw end after {i}") - break - - successes: List[float] = [] - counts: List[int] = [] - for model_index, (name, (count, result)) in enumerate( - zip(self.model_names, results) - ): - - row[f"{name}_success"] = result - row[f"{name}_count"] = count - successes.append(max(result, 0.5)) - counts.append(count) - - success_weight = sum( - [ - (succ / count) * prob - for succ, count, prob in zip(successes, counts, self.probabilities) - ] - ) - new_probabilities = [ - (succ / count) * old_prob / success_weight - for succ, count, old_prob in zip(successes, counts, self.probabilities) - ] - self.probabilities = new_probabilities - for name, probability in zip(self.model_names, self.probabilities): - row[f"{name}_prob"] = probability - _logger.info(row) - - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writerow(row) - - if self.use_end_threshold: - max_prob = max(self.probabilities) - if max_prob > self.end_threshold: - _logger.info( - f"Aborting early, because {max_prob} is greater than {self.end_threshold}" - ) - break diff --git a/deepdog/bayes_run_simulpairs.py b/deepdog/bayes_run_simulpairs.py deleted file mode 100644 index cffc9cd..0000000 --- a/deepdog/bayes_run_simulpairs.py +++ /dev/null @@ -1,382 +0,0 @@ -import pdme.inputs -import pdme.model -import pdme.measurement.input_types -import pdme.measurement.oscillating_dipole -import pdme.util.fast_v_calc -import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List -import datetime -import csv -import multiprocessing -import logging -import numpy -import numpy.random - - -# TODO: remove hardcode -CHUNKSIZE = 50 - -# TODO: It's garbage to have this here duplicated from pdme. -DotInput = Tuple[numpy.typing.ArrayLike, float] - - -_logger = logging.getLogger(__name__) - - -def get_a_simul_result_using_pairs(input) -> numpy.ndarray: - ( - model, - dot_inputs, - pair_inputs, - local_lows, - local_highs, - nonlocal_lows, - nonlocal_highs, - monte_carlo_count, - monte_carlo_cycles, - max_frequency, - seed, - ) = input - - rng = numpy.random.default_rng(seed) - local_total = 0 - combined_total = 0 - - sample_dipoles = model.get_monte_carlo_dipole_inputs( - monte_carlo_count, max_frequency, rng_to_use=rng - ) - local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) - local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs) - nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses( - pair_inputs, sample_dipoles - ) - nonlocal_matches = pdme.util.fast_v_calc.between( - nonlocal_vals, nonlocal_lows, nonlocal_highs - ) - combined_matches = numpy.logical_and(local_matches, nonlocal_matches) - - local_total += numpy.count_nonzero(local_matches) - combined_total += numpy.count_nonzero(combined_matches) - return numpy.array([local_total, combined_total]) - - -class BayesRunSimulPairs: - """ - A dual pairs-nonpairs Bayes run for a given set of dots. - - Parameters - ---------- - dot_inputs : Sequence[DotInput] - The dot inputs for this bayes run. - - models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] - The models to evaluate. - - actual_model : pdme.model.DipoleModel - The modoel for the model which is actually correct. - - filename_slug : str - The filename slug to include. - - run_count: int - The number of runs to do. - """ - - def __init__( - self, - dot_positions: Sequence[numpy.typing.ArrayLike], - frequency_range: Sequence[float], - models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], - actual_model: pdme.model.DipoleModel, - filename_slug: str, - run_count: int = 100, - low_error: float = 0.9, - high_error: float = 1.1, - pairs_high_error=None, - pairs_low_error=None, - monte_carlo_count: int = 10000, - monte_carlo_cycles: int = 10, - target_success: int = 100, - max_monte_carlo_cycles_steps: int = 10, - max_frequency: float = 20, - end_threshold: float = None, - chunksize: int = CHUNKSIZE, - ) -> None: - self.dot_inputs = pdme.inputs.inputs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( - self.dot_inputs - ) - - self.dot_pair_inputs = pdme.inputs.input_pairs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_pair_inputs_array = ( - pdme.measurement.input_types.dot_pair_inputs_to_array(self.dot_pair_inputs) - ) - - self.models = [mod for (_, mod) in models_with_names] - self.model_names = [name for (name, _) in models_with_names] - self.actual_model = actual_model - - self.n: int - try: - self.n = self.actual_model.n # type: ignore - except AttributeError: - self.n = 1 - - self.model_count = len(self.models) - self.monte_carlo_count = monte_carlo_count - self.monte_carlo_cycles = monte_carlo_cycles - self.target_success = target_success - self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps - self.run_count = run_count - self.low_error = low_error - self.high_error = high_error - if pairs_low_error is None: - self.pairs_low_error = self.low_error - else: - self.pairs_low_error = pairs_low_error - if pairs_high_error is None: - self.pairs_high_error = self.high_error - else: - self.pairs_high_error = pairs_high_error - - self.csv_fields = [] - for i in range(self.n): - self.csv_fields.extend( - [ - f"dipole_moment_{i+1}", - f"dipole_location_{i+1}", - f"dipole_frequency_{i+1}", - ] - ) - self.compensate_zeros = True - self.chunksize = chunksize - for name in self.model_names: - self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"]) - - self.probabilities_no_pairs = [1 / self.model_count] * self.model_count - self.probabilities_pairs = [1 / self.model_count] * self.model_count - - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.filename_pairs = f"{timestamp}-{filename_slug}.simulpairs.yespairs.csv" - self.filename_no_pairs = f"{timestamp}-{filename_slug}.simulpairs.noopairs.csv" - - self.max_frequency = max_frequency - - if end_threshold is not None: - if 0 < end_threshold < 1: - self.end_threshold: float = end_threshold - self.use_end_threshold = True - _logger.info(f"Will abort early, at {self.end_threshold}.") - else: - raise ValueError( - f"end_threshold should be between 0 and 1, but is actually {end_threshold}" - ) - - def go(self) -> None: - with open(self.filename_pairs, "a", newline="") as outfile: - writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") - writer.writeheader() - with open(self.filename_no_pairs, "a", newline="") as outfile: - writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") - writer.writeheader() - - for run in range(1, self.run_count + 1): - - # Generate the actual dipoles - actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) - - dots = actual_dipoles.get_percent_range_dot_measurements( - self.dot_inputs, self.low_error, self.high_error - ) - ( - lows, - highs, - ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( - dots - ) - - pair_lows, pair_highs = (None, None) - pair_measurements = actual_dipoles.get_percent_range_dot_pair_measurements( - self.dot_pair_inputs, self.pairs_low_error, self.pairs_high_error - ) - ( - pair_lows, - pair_highs, - ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( - pair_measurements - ) - - _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") - - # define a new seed sequence for each run - seed_sequence = numpy.random.SeedSequence(run) - - results_pairs = [] - results_no_pairs = [] - _logger.debug("Going to iterate over models now") - for model_count, model in enumerate(self.models): - _logger.debug(f"Doing model #{model_count}") - - core_count = multiprocessing.cpu_count() - 1 or 1 - with multiprocessing.Pool(core_count) as pool: - cycle_count = 0 - cycle_success_pairs = 0 - cycle_success_no_pairs = 0 - cycles = 0 - while (cycles < self.max_monte_carlo_cycles_steps) and ( - min(cycle_success_pairs, cycle_success_no_pairs) - <= self.target_success - ): - _logger.debug(f"Starting cycle {cycles}") - - cycles += 1 - current_success_pairs = 0 - current_success_no_pairs = 0 - cycle_count += self.monte_carlo_count * self.monte_carlo_cycles - - # generate a seed from the sequence for each core. - # note this needs to be inside the loop for monte carlo cycle steps! - # that way we get more stuff. - - seeds = seed_sequence.spawn(self.monte_carlo_cycles) - _logger.debug(f"Creating {self.monte_carlo_cycles} seeds") - current_success_both = numpy.array( - sum( - pool.imap_unordered( - get_a_simul_result_using_pairs, - [ - ( - model, - self.dot_inputs_array, - self.dot_pair_inputs_array, - lows, - highs, - pair_lows, - pair_highs, - self.monte_carlo_count, - self.monte_carlo_cycles, - self.max_frequency, - seed, - ) - for seed in seeds - ], - self.chunksize, - ) - ) - ) - current_success_no_pairs = current_success_both[0] - current_success_pairs = current_success_both[1] - - cycle_success_no_pairs += current_success_no_pairs - cycle_success_pairs += current_success_pairs - _logger.debug( - f"(pair, no_pair) successes are {(cycle_success_pairs, cycle_success_no_pairs)}" - ) - results_pairs.append((cycle_count, cycle_success_pairs)) - results_no_pairs.append((cycle_count, cycle_success_no_pairs)) - - _logger.debug("Done, constructing output now") - row_pairs = { - "dipole_moment_1": actual_dipoles.dipoles[0].p, - "dipole_location_1": actual_dipoles.dipoles[0].s, - "dipole_frequency_1": actual_dipoles.dipoles[0].w, - } - row_no_pairs = { - "dipole_moment_1": actual_dipoles.dipoles[0].p, - "dipole_location_1": actual_dipoles.dipoles[0].s, - "dipole_frequency_1": actual_dipoles.dipoles[0].w, - } - for i in range(1, self.n): - try: - current_dipoles = actual_dipoles.dipoles[i] - row_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p - row_pairs[f"dipole_location_{i+1}"] = current_dipoles.s - row_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w - row_no_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p - row_no_pairs[f"dipole_location_{i+1}"] = current_dipoles.s - row_no_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w - except IndexError: - _logger.info(f"Not writing anymore, saw end after {i}") - break - - successes_pairs: List[float] = [] - successes_no_pairs: List[float] = [] - counts: List[int] = [] - for model_index, ( - name, - (count_pair, result_pair), - (count_no_pair, result_no_pair), - ) in enumerate(zip(self.model_names, results_pairs, results_no_pairs)): - - row_pairs[f"{name}_success"] = result_pair - row_pairs[f"{name}_count"] = count_pair - successes_pairs.append(max(result_pair, 0.5)) - - row_no_pairs[f"{name}_success"] = result_no_pair - row_no_pairs[f"{name}_count"] = count_no_pair - successes_no_pairs.append(max(result_no_pair, 0.5)) - - counts.append(count_pair) - - success_weight_pair = sum( - [ - (succ / count) * prob - for succ, count, prob in zip( - successes_pairs, counts, self.probabilities_pairs - ) - ] - ) - success_weight_no_pair = sum( - [ - (succ / count) * prob - for succ, count, prob in zip( - successes_no_pairs, counts, self.probabilities_no_pairs - ) - ] - ) - new_probabilities_pair = [ - (succ / count) * old_prob / success_weight_pair - for succ, count, old_prob in zip( - successes_pairs, counts, self.probabilities_pairs - ) - ] - new_probabilities_no_pair = [ - (succ / count) * old_prob / success_weight_no_pair - for succ, count, old_prob in zip( - successes_no_pairs, counts, self.probabilities_no_pairs - ) - ] - self.probabilities_pairs = new_probabilities_pair - self.probabilities_no_pairs = new_probabilities_no_pair - for name, probability_pair, probability_no_pair in zip( - self.model_names, self.probabilities_pairs, self.probabilities_no_pairs - ): - row_pairs[f"{name}_prob"] = probability_pair - row_no_pairs[f"{name}_prob"] = probability_no_pair - _logger.debug(row_pairs) - _logger.debug(row_no_pairs) - - with open(self.filename_pairs, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writerow(row_pairs) - with open(self.filename_no_pairs, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writerow(row_no_pairs) - - if self.use_end_threshold: - max_prob = min( - max(self.probabilities_pairs), max(self.probabilities_no_pairs) - ) - if max_prob > self.end_threshold: - _logger.info( - f"Aborting early, because {max_prob} is greater than {self.end_threshold}" - ) - break diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py deleted file mode 100644 index 24361b9..0000000 --- a/deepdog/bayes_run_with_ss.py +++ /dev/null @@ -1,261 +0,0 @@ -import deepdog.subset_simulation -import pdme.inputs -import pdme.model -import pdme.measurement.input_types -import pdme.measurement.oscillating_dipole -import pdme.util.fast_v_calc -import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List, Optional -import datetime -import csv -import logging -import numpy -import numpy.typing - - -# TODO: remove hardcode -CHUNKSIZE = 50 - -# TODO: It's garbage to have this here duplicated from pdme. -DotInput = Tuple[numpy.typing.ArrayLike, float] - - -CLAMPING_FACTOR = 10 - -_logger = logging.getLogger(__name__) - - -class BayesRunWithSubspaceSimulation: - """ - A single Bayes run for a given set of dots. - - Parameters - ---------- - dot_inputs : Sequence[DotInput] - The dot inputs for this bayes run. - - models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] - The models to evaluate. - - actual_model : pdme.model.DipoleModel - The model which is actually correct. - - filename_slug : str - The filename slug to include. - - run_count: int - The number of runs to do. - """ - - def __init__( - self, - dot_positions: Sequence[numpy.typing.ArrayLike], - frequency_range: Sequence[float], - models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], - actual_model: pdme.model.DipoleModel, - filename_slug: str, - max_frequency: float = 20, - end_threshold: float = None, - run_count=100, - chunksize: int = CHUNKSIZE, - ss_n_c: int = 500, - ss_n_s: int = 100, - ss_m_max: int = 15, - ss_target_cost: Optional[float] = None, - ss_level_0_seed: int = 200, - ss_mcmc_seed: int = 20, - ss_use_adaptive_steps=True, - ss_default_phi_step=0.01, - ss_default_theta_step=0.01, - ss_default_r_step=0.01, - ss_default_w_log_step=0.01, - ss_default_upper_w_log_step=4, - ss_dump_last_generation=False, - ss_initial_costs_chunk_size=100, - write_output_to_bayesruncsv=True, - use_timestamp_for_output=True, - ) -> None: - self.dot_inputs = pdme.inputs.inputs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( - self.dot_inputs - ) - - self.models_with_names = models_with_names - self.models = [model for (_, model) in models_with_names] - self.model_names = [name for (name, _) in models_with_names] - self.actual_model = actual_model - - self.n: int - try: - self.n = self.actual_model.n # type: ignore - except AttributeError: - self.n = 1 - - self.model_count = len(self.models) - - self.csv_fields = [] - for i in range(self.n): - self.csv_fields.extend( - [ - f"dipole_moment_{i+1}", - f"dipole_location_{i+1}", - f"dipole_frequency_{i+1}", - ] - ) - self.compensate_zeros = True - self.chunksize = chunksize - for name in self.model_names: - self.csv_fields.extend([f"{name}_likelihood", f"{name}_prob"]) - - self.probabilities = [1 / self.model_count] * self.model_count - - if use_timestamp_for_output: - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv" - else: - self.filename = f"{filename_slug}.bayesrunwithss.csv" - self.max_frequency = max_frequency - - if end_threshold is not None: - if 0 < end_threshold < 1: - self.end_threshold: float = end_threshold - self.use_end_threshold = True - _logger.info(f"Will abort early, at {self.end_threshold}.") - else: - raise ValueError( - f"end_threshold should be between 0 and 1, but is actually {end_threshold}" - ) - - self.ss_n_c = ss_n_c - self.ss_n_s = ss_n_s - self.ss_m_max = ss_m_max - self.ss_target_cost = ss_target_cost - self.ss_level_0_seed = ss_level_0_seed - self.ss_mcmc_seed = ss_mcmc_seed - self.ss_use_adaptive_steps = ss_use_adaptive_steps - self.ss_default_phi_step = ss_default_phi_step - self.ss_default_theta_step = ss_default_theta_step - self.ss_default_r_step = ss_default_r_step - self.ss_default_w_log_step = ss_default_w_log_step - self.ss_default_upper_w_log_step = ss_default_upper_w_log_step - self.ss_dump_last_generation = ss_dump_last_generation - self.ss_initial_costs_chunk_size = ss_initial_costs_chunk_size - self.run_count = run_count - - self.write_output_to_csv = write_output_to_bayesruncsv - - def go(self) -> Sequence: - - if self.write_output_to_csv: - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writeheader() - - return_result = [] - - for run in range(1, self.run_count + 1): - - # Generate the actual dipoles - actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) - - measurements = actual_dipoles.get_dot_measurements(self.dot_inputs) - - _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") - - # define a new seed sequence for each run - - results = [] - _logger.debug("Going to iterate over models now") - for model_count, model in enumerate(self.models_with_names): - _logger.debug(f"Doing model #{model_count}, {model[0]}") - subset_run = deepdog.subset_simulation.SubsetSimulation( - model, - self.dot_inputs, - measurements, - self.ss_n_c, - self.ss_n_s, - self.ss_m_max, - self.ss_target_cost, - self.ss_level_0_seed, - self.ss_mcmc_seed, - self.ss_use_adaptive_steps, - self.ss_default_phi_step, - self.ss_default_theta_step, - self.ss_default_r_step, - self.ss_default_w_log_step, - self.ss_default_upper_w_log_step, - initial_cost_chunk_size=self.ss_initial_costs_chunk_size, - keep_probs_list=False, - dump_last_generation_to_file=self.ss_dump_last_generation, - ) - results.append(subset_run.execute()) - - _logger.debug("Done, constructing output now") - row = { - "dipole_moment_1": actual_dipoles.dipoles[0].p, - "dipole_location_1": actual_dipoles.dipoles[0].s, - "dipole_frequency_1": actual_dipoles.dipoles[0].w, - } - for i in range(1, self.n): - try: - current_dipoles = actual_dipoles.dipoles[i] - row[f"dipole_moment_{i+1}"] = current_dipoles.p - row[f"dipole_location_{i+1}"] = current_dipoles.s - row[f"dipole_frequency_{i+1}"] = current_dipoles.w - except IndexError: - _logger.info(f"Not writing anymore, saw end after {i}") - break - - likelihoods: List[float] = [] - - for (name, result) in zip(self.model_names, results): - if result.over_target_likelihood is None: - if result.lowest_likelihood is None: - _logger.error(f"result {result} looks bad") - clamped_likelihood = 10**-15 - else: - clamped_likelihood = result.lowest_likelihood / CLAMPING_FACTOR - _logger.warning( - f"got a none result, clamping to {clamped_likelihood}" - ) - else: - clamped_likelihood = result.over_target_likelihood - likelihoods.append(clamped_likelihood) - row[f"{name}_likelihood"] = clamped_likelihood - - success_weight = sum( - [ - likelihood * prob - for likelihood, prob in zip(likelihoods, self.probabilities) - ] - ) - new_probabilities = [ - likelihood * old_prob / success_weight - for likelihood, old_prob in zip(likelihoods, self.probabilities) - ] - self.probabilities = new_probabilities - for name, probability in zip(self.model_names, self.probabilities): - row[f"{name}_prob"] = probability - _logger.info(row) - return_result.append(row) - - if self.write_output_to_csv: - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writerow(row) - - if self.use_end_threshold: - max_prob = max(self.probabilities) - if max_prob > self.end_threshold: - _logger.info( - f"Aborting early, because {max_prob} is greater than {self.end_threshold}" - ) - break - - return return_result diff --git a/tests/test_bayes_run_with_ss.py b/tests/test_bayes_run_with_ss.py deleted file mode 100644 index cbde764..0000000 --- a/tests/test_bayes_run_with_ss.py +++ /dev/null @@ -1,158 +0,0 @@ -import deepdog -import logging -import logging.config - -import numpy.random - -from pdme.model import ( - LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel, - LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel, - LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel, -) - - -_logger = logging.getLogger(__name__) - - -def fixed_z_model_func( - xmin, - xmax, - ymin, - ymax, - zmin, - zmax, - wexp_min, - wexp_max, - pfixed, - n_max, - prob_occupancy, -): - return LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel( - xmin, - xmax, - ymin, - ymax, - zmin, - zmax, - wexp_min, - wexp_max, - pfixed, - 0, - 0, - n_max, - prob_occupancy, - ) - - -def get_model(orientation): - model_funcs = { - "fixedz": fixed_z_model_func, - "free": LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel, - "fixedxy": LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel, - } - model = model_funcs[orientation]( - -10, - 10, - -17.5, - 17.5, - 5, - 7.5, - -5, - 6.5, - 10**3, - 2, - 0.99999999, - ) - model.n = 2 - model.rng = numpy.random.default_rng(1234) - - return ( - f"connors_geom-5height-orientation_{orientation}-pfixexp_{3}-dipole_count_{2}", - model, - ) - - -def test_basic_analysis(snapshot): - - dot_positions = [[0, 0, 0], [0, 1, 0]] - - freqs = [1, 10, 100] - models = [] - - orientations = ["free", "fixedxy", "fixedz"] - for orientation in orientations: - models.append(get_model(orientation)) - - _logger.info(f"have {len(models)} models to look at") - if len(models) == 1: - _logger.info(f"only one model, name: {models[0][0]}") - - square_run = deepdog.BayesRunWithSubspaceSimulation( - dot_positions, - freqs, - models, - models[0][1], - filename_slug="test", - end_threshold=0.9, - ss_n_c=5, - ss_n_s=2, - ss_m_max=10, - ss_target_cost=150, - ss_level_0_seed=200, - ss_mcmc_seed=20, - ss_use_adaptive_steps=True, - ss_default_phi_step=0.01, - ss_default_theta_step=0.01, - ss_default_r_step=0.01, - ss_default_w_log_step=0.01, - ss_default_upper_w_log_step=4, - ss_dump_last_generation=False, - write_output_to_bayesruncsv=False, - ss_initial_costs_chunk_size=1000, - ) - result = square_run.go() - - assert result == snapshot - - -def test_bayesss_with_tighter_cost(snapshot): - - dot_positions = [[0, 0, 0], [0, 1, 0]] - - freqs = [1, 10, 100] - models = [] - - orientations = ["free", "fixedxy", "fixedz"] - for orientation in orientations: - models.append(get_model(orientation)) - - _logger.info(f"have {len(models)} models to look at") - if len(models) == 1: - _logger.info(f"only one model, name: {models[0][0]}") - - square_run = deepdog.BayesRunWithSubspaceSimulation( - dot_positions, - freqs, - models, - models[0][1], - filename_slug="test", - end_threshold=0.9, - ss_n_c=5, - ss_n_s=2, - ss_m_max=10, - ss_target_cost=1.5, - ss_level_0_seed=200, - ss_mcmc_seed=20, - ss_use_adaptive_steps=True, - ss_default_phi_step=0.01, - ss_default_theta_step=0.01, - ss_default_r_step=0.01, - ss_default_w_log_step=0.01, - ss_default_upper_w_log_step=4, - ss_dump_last_generation=False, - write_output_to_bayesruncsv=False, - ss_initial_costs_chunk_size=1, - ) - result = square_run.go() - - assert result == snapshot