diff --git a/deepdog/__init__.py b/deepdog/__init__.py index f8ad9bb..e37fb53 100644 --- a/deepdog/__init__.py +++ b/deepdog/__init__.py @@ -1,9 +1,7 @@ import logging from deepdog.meta import __version__ from deepdog.bayes_run import BayesRun -from deepdog.alt_bayes_run import AltBayesRun -from deepdog.alt_bayes_run_simulpairs import AltBayesRunSimulPairs -from deepdog.diagnostic import Diagnostic +from deepdog.bayes_run_simulpairs import BayesRunSimulPairs def get_version(): @@ -13,9 +11,7 @@ def get_version(): __all__ = [ "get_version", "BayesRun", - "AltBayesRun", - "AltBayesRunSimulPairs", - "Diagnostic", + "BayesRunSimulPairs", ] diff --git a/deepdog/alt_bayes_run.py b/deepdog/alt_bayes_run.py deleted file mode 100644 index b16b1b8..0000000 --- a/deepdog/alt_bayes_run.py +++ /dev/null @@ -1,307 +0,0 @@ -import pdme.inputs -import pdme.model -import pdme.measurement.input_types -import pdme.measurement.oscillating_dipole -import pdme.util.fast_v_calc -import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List -import datetime -import csv -import multiprocessing -import logging -import numpy - - -# TODO: remove hardcode -CHUNKSIZE = 50 - -# TODO: It's garbage to have this here duplicated from pdme. -DotInput = Tuple[numpy.typing.ArrayLike, float] - - -_logger = logging.getLogger(__name__) - - -def get_a_result(input) -> int: - discretisation, dot_inputs, lows, highs, monte_carlo_count, max_frequency = input - sample_dipoles = discretisation.get_model().get_n_single_dipoles( - monte_carlo_count, max_frequency - ) - vals = pdme.util.fast_v_calc.fast_vs_for_dipoles(dot_inputs, sample_dipoles) - return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) - - -def get_a_result_using_pairs(input) -> int: - ( - discretisation, - dot_inputs, - pair_inputs, - local_lows, - local_highs, - nonlocal_lows, - nonlocal_highs, - monte_carlo_count, - max_frequency, - ) = input - sample_dipoles = discretisation.get_model().get_n_single_dipoles( - monte_carlo_count, max_frequency - ) - local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoles(dot_inputs, sample_dipoles) - local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs) - nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal( - pair_inputs, sample_dipoles - ) - nonlocal_matches = pdme.util.fast_v_calc.between( - nonlocal_vals, nonlocal_lows, nonlocal_highs - ) - combined_matches = numpy.logical_and(local_matches, nonlocal_matches) - return numpy.count_nonzero(combined_matches) - - -class AltBayesRun: - """ - A single Bayes run for a given set of dots. - - Parameters - ---------- - dot_inputs : Sequence[DotInput] - The dot inputs for this bayes run. - - discretisations_with_names : Sequence[Tuple(str, pdme.model.Model)] - The models to evaluate. - - actual_model_discretisation : pdme.model.Discretisation - The discretisation for the model which is actually correct. - - filename_slug : str - The filename slug to include. - - run_count: int - The number of runs to do. - """ - - def __init__( - self, - dot_positions: Sequence[numpy.typing.ArrayLike], - frequency_range: Sequence[float], - discretisations_with_names: Sequence[Tuple[str, pdme.model.Discretisation]], - actual_model: pdme.model.Model, - filename_slug: str, - run_count: int = 100, - low_error: float = 0.9, - high_error: float = 1.1, - pairs_high_error=None, - pairs_low_error=None, - monte_carlo_count: int = 10000, - monte_carlo_cycles: int = 10, - target_success: int = 100, - max_monte_carlo_cycles_steps: int = 10, - max_frequency: float = 20, - end_threshold: float = None, - chunksize: int = CHUNKSIZE, - use_pairs: bool = False, - ) -> None: - self.dot_inputs = pdme.inputs.inputs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( - self.dot_inputs - ) - - self.use_pairs = use_pairs - - self.dot_pair_inputs = pdme.inputs.input_pairs_with_frequency_range( - dot_positions, frequency_range - ) - self.dot_pair_inputs_array = ( - pdme.measurement.input_types.dot_pair_inputs_to_array(self.dot_pair_inputs) - ) - - self.discretisations = [disc for (_, disc) in discretisations_with_names] - self.model_names = [name for (name, _) in discretisations_with_names] - self.actual_model = actual_model - self.model_count = len(self.discretisations) - self.monte_carlo_count = monte_carlo_count - self.monte_carlo_cycles = monte_carlo_cycles - self.target_success = target_success - self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps - self.run_count = run_count - self.low_error = low_error - self.high_error = high_error - if pairs_low_error is None: - self.pairs_low_error = self.low_error - else: - self.pairs_low_error = pairs_low_error - if pairs_high_error is None: - self.pairs_high_error = self.high_error - else: - self.pairs_high_error = pairs_high_error - self.csv_fields = ["dipole_moment", "dipole_location", "dipole_frequency"] - self.compensate_zeros = True - self.chunksize = chunksize - for name in self.model_names: - self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"]) - - self.probabilities = [1 / self.model_count] * self.model_count - - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - if self.use_pairs: - self.filename = f"{timestamp}-{filename_slug}.altbayes.pairs.csv" - else: - self.filename = f"{timestamp}-{filename_slug}.altbayes.csv" - self.max_frequency = max_frequency - - if end_threshold is not None: - if 0 < end_threshold < 1: - self.end_threshold: float = end_threshold - self.use_end_threshold = True - _logger.info(f"Will abort early, at {self.end_threshold}.") - else: - raise ValueError( - f"end_threshold should be between 0 and 1, but is actually {end_threshold}" - ) - - def go(self) -> None: - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") - writer.writeheader() - - for run in range(1, self.run_count + 1): - - rng = numpy.random.default_rng() - frequency = rng.uniform(1, self.max_frequency) - - # Generate the actual dipoles - actual_dipoles = self.actual_model.get_dipoles(frequency) - - dots = actual_dipoles.get_percent_range_dot_measurements( - self.dot_inputs, self.low_error, self.high_error - ) - ( - lows, - highs, - ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( - dots - ) - - pair_lows, pair_highs = (None, None) - if self.use_pairs: - pair_measurements = ( - actual_dipoles.get_percent_range_dot_pair_measurements( - self.dot_pair_inputs, - self.pairs_low_error, - self.pairs_high_error, - ) - ) - ( - pair_lows, - pair_highs, - ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( - pair_measurements - ) - - _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") - - results = [] - _logger.debug("Going to iterate over discretisations now") - for disc_count, discretisation in enumerate(self.discretisations): - _logger.debug(f"Doing discretisation #{disc_count}") - with multiprocessing.Pool(multiprocessing.cpu_count() - 1 or 1) as pool: - cycle_count = 0 - cycle_success = 0 - cycles = 0 - while (cycles < self.max_monte_carlo_cycles_steps) and ( - cycle_success <= self.target_success - ): - _logger.debug(f"Starting cycle {cycles}") - cycles += 1 - current_success = 0 - cycle_count += self.monte_carlo_count * self.monte_carlo_cycles - if self.use_pairs: - current_success = sum( - pool.imap_unordered( - get_a_result_using_pairs, - [ - ( - discretisation, - self.dot_inputs_array, - self.dot_pair_inputs_array, - lows, - highs, - pair_lows, - pair_highs, - self.monte_carlo_count, - self.max_frequency, - ) - ] - * self.monte_carlo_cycles, - self.chunksize, - ) - ) - else: - current_success = sum( - pool.imap_unordered( - get_a_result, - [ - ( - discretisation, - self.dot_inputs_array, - lows, - highs, - self.monte_carlo_count, - self.max_frequency, - ) - ] - * self.monte_carlo_cycles, - self.chunksize, - ) - ) - - cycle_success += current_success - results.append((cycle_count, cycle_success)) - - _logger.debug("Done, constructing output now") - row = { - "dipole_moment": actual_dipoles.dipoles[0].p, - "dipole_location": actual_dipoles.dipoles[0].s, - "dipole_frequency": actual_dipoles.dipoles[0].w, - } - successes: List[float] = [] - counts: List[int] = [] - for model_index, (name, (count, result)) in enumerate( - zip(self.model_names, results) - ): - - row[f"{name}_success"] = result - row[f"{name}_count"] = count - successes.append(max(result, 0.5)) - counts.append(count) - - success_weight = sum( - [ - (succ / count) * prob - for succ, count, prob in zip(successes, counts, self.probabilities) - ] - ) - new_probabilities = [ - (succ / count) * old_prob / success_weight - for succ, count, old_prob in zip(successes, counts, self.probabilities) - ] - self.probabilities = new_probabilities - for name, probability in zip(self.model_names, self.probabilities): - row[f"{name}_prob"] = probability - _logger.info(row) - - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, fieldnames=self.csv_fields, dialect="unix" - ) - writer.writerow(row) - - if self.use_end_threshold: - max_prob = max(self.probabilities) - if max_prob > self.end_threshold: - _logger.info( - f"Aborting early, because {max_prob} is greater than {self.end_threshold}" - ) - break diff --git a/deepdog/bayes_run.py b/deepdog/bayes_run.py index 1270944..d50aeb5 100644 --- a/deepdog/bayes_run.py +++ b/deepdog/bayes_run.py @@ -1,17 +1,19 @@ +import pdme.inputs import pdme.model +import pdme.measurement.input_types +import pdme.measurement.oscillating_dipole +import pdme.util.fast_v_calc +import pdme.util.fast_nonlocal_spectrum from typing import Sequence, Tuple, List import datetime -import itertools import csv +import multiprocessing import logging import numpy -import scipy.optimize -import multiprocessing # TODO: remove hardcode -COST_THRESHOLD = 1e-10 - +CHUNKSIZE = 50 # TODO: It's garbage to have this here duplicated from pdme. DotInput = Tuple[numpy.typing.ArrayLike, float] @@ -20,10 +22,40 @@ DotInput = Tuple[numpy.typing.ArrayLike, float] _logger = logging.getLogger(__name__) -def get_a_result( - discretisation, dots, index -) -> Tuple[Tuple[int, ...], scipy.optimize.OptimizeResult]: - return (index, discretisation.solve_for_index(dots, index)) +def get_a_result(input) -> int: + model, dot_inputs, lows, highs, monte_carlo_count, max_frequency, seed = input + + rng = numpy.random.default_rng(seed) + sample_dipoles = model.get_monte_carlo_dipole_inputs( + monte_carlo_count, max_frequency, rng_to_use=rng + ) + vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) + return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) + + +def get_a_result_using_pairs(input) -> int: + ( + model, + dot_inputs, + pair_inputs, + local_lows, + local_highs, + nonlocal_lows, + nonlocal_highs, + monte_carlo_count, + max_frequency, + ) = input + sample_dipoles = model.get_n_single_dipoles(monte_carlo_count, max_frequency) + local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoles(dot_inputs, sample_dipoles) + local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs) + nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal( + pair_inputs, sample_dipoles + ) + nonlocal_matches = pdme.util.fast_v_calc.between( + nonlocal_vals, nonlocal_lows, nonlocal_highs + ) + combined_matches = numpy.logical_and(local_matches, nonlocal_matches) + return numpy.count_nonzero(combined_matches) class BayesRun: @@ -35,11 +67,11 @@ class BayesRun: dot_inputs : Sequence[DotInput] The dot inputs for this bayes run. - discretisations_with_names : Sequence[Tuple(str, pdme.model.Model)] + models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] The models to evaluate. - actual_model_discretisation : pdme.model.Discretisation - The discretisation for the model which is actually correct. + actual_model : pdme.model.DipoleModel + The model which is actually correct. filename_slug : str The filename slug to include. @@ -50,29 +82,66 @@ class BayesRun: def __init__( self, - dot_inputs: Sequence[DotInput], - discretisations_with_names: Sequence[Tuple[str, pdme.model.Discretisation]], - actual_model: pdme.model.Model, + dot_positions: Sequence[numpy.typing.ArrayLike], + frequency_range: Sequence[float], + models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], + actual_model: pdme.model.DipoleModel, filename_slug: str, - run_count: int, - max_frequency: float = None, + run_count: int = 100, + low_error: float = 0.9, + high_error: float = 1.1, + monte_carlo_count: int = 10000, + monte_carlo_cycles: int = 10, + target_success: int = 100, + max_monte_carlo_cycles_steps: int = 10, + max_frequency: float = 20, end_threshold: float = None, + chunksize: int = CHUNKSIZE, ) -> None: - self.dot_inputs = dot_inputs - self.discretisations = [disc for (_, disc) in discretisations_with_names] - self.model_names = [name for (name, _) in discretisations_with_names] + self.dot_inputs = pdme.inputs.inputs_with_frequency_range( + dot_positions, frequency_range + ) + self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( + self.dot_inputs + ) + + self.models = [model for (_, model) in models_with_names] + self.model_names = [name for (name, _) in models_with_names] self.actual_model = actual_model - self.model_count = len(self.discretisations) + + self.n: int + try: + self.n = self.actual_model.n # type: ignore + except AttributeError: + self.n = 1 + + self.model_count = len(self.models) + self.monte_carlo_count = monte_carlo_count + self.monte_carlo_cycles = monte_carlo_cycles + self.target_success = target_success + self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps self.run_count = run_count - self.csv_fields = ["dipole_moment", "dipole_location", "dipole_frequency"] + self.low_error = low_error + self.high_error = high_error + + self.csv_fields = [] + for i in range(self.n): + self.csv_fields.extend( + [ + f"dipole_moment_{i+1}", + f"dipole_location_{i+1}", + f"dipole_frequency_{i+1}", + ] + ) self.compensate_zeros = True + self.chunksize = chunksize for name in self.model_names: self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"]) self.probabilities = [1 / self.model_count] * self.model_count timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.filename = f"{timestamp}-{filename_slug}.csv" + self.filename = f"{timestamp}-{filename_slug}.bayesrun.csv" self.max_frequency = max_frequency if end_threshold is not None: @@ -91,52 +160,95 @@ class BayesRun: writer.writeheader() for run in range(1, self.run_count + 1): - frequency: float = run - if self.max_frequency is not None and self.max_frequency > 1: - rng = numpy.random.default_rng() - frequency = rng.uniform(1, self.max_frequency) - dipoles = self.actual_model.get_dipoles(frequency) - dots = dipoles.get_dot_measurements(self.dot_inputs) - _logger.info(f"Going to work on dipole at {dipoles.dipoles}") + # Generate the actual dipoles + actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) + + dots = actual_dipoles.get_percent_range_dot_measurements( + self.dot_inputs, self.low_error, self.high_error + ) + ( + lows, + highs, + ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( + dots + ) + + _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") + + # define a new seed sequence for each run + seed_sequence = numpy.random.SeedSequence(run) results = [] - _logger.debug("Going to iterate over discretisations now") - for disc_count, discretisation in enumerate(self.discretisations): - _logger.debug(f"Doing discretisation #{disc_count}") - with multiprocessing.Pool(multiprocessing.cpu_count() - 1 or 1) as pool: - results.append( - pool.starmap( - get_a_result, - zip( - itertools.repeat(discretisation), - itertools.repeat(dots), - discretisation.all_indices(), - ), + _logger.debug("Going to iterate over models now") + for model_count, model in enumerate(self.models): + _logger.debug(f"Doing model #{model_count}") + core_count = multiprocessing.cpu_count() - 1 or 1 + with multiprocessing.Pool(core_count) as pool: + cycle_count = 0 + cycle_success = 0 + cycles = 0 + while (cycles < self.max_monte_carlo_cycles_steps) and ( + cycle_success <= self.target_success + ): + _logger.debug(f"Starting cycle {cycles}") + cycles += 1 + current_success = 0 + cycle_count += self.monte_carlo_count * self.monte_carlo_cycles + + # generate a seed from the sequence for each core. + # note this needs to be inside the loop for monte carlo cycle steps! + # that way we get more stuff. + seeds = seed_sequence.spawn(self.monte_carlo_cycles) + + current_success = sum( + pool.imap_unordered( + get_a_result, + [ + ( + model, + self.dot_inputs_array, + lows, + highs, + self.monte_carlo_count, + self.max_frequency, + seed, + ) + for seed in seeds + ], + self.chunksize, + ) ) - ) + + cycle_success += current_success + _logger.debug(f"current running successes: {cycle_success}") + results.append((cycle_count, cycle_success)) _logger.debug("Done, constructing output now") row = { - "dipole_moment": dipoles.dipoles[0].p, - "dipole_location": dipoles.dipoles[0].s, - "dipole_frequency": dipoles.dipoles[0].w, + "dipole_moment_1": actual_dipoles.dipoles[0].p, + "dipole_location_1": actual_dipoles.dipoles[0].s, + "dipole_frequency_1": actual_dipoles.dipoles[0].w, } + for i in range(1, self.n): + try: + current_dipoles = actual_dipoles.dipoles[i] + row[f"dipole_moment_{i+1}"] = current_dipoles.p + row[f"dipole_location_{i+1}"] = current_dipoles.s + row[f"dipole_frequency_{i+1}"] = current_dipoles.w + except IndexError: + _logger.info(f"Not writing anymore, saw end after {i}") + break + successes: List[float] = [] counts: List[int] = [] - for model_index, (name, result) in enumerate( + for model_index, (name, (count, result)) in enumerate( zip(self.model_names, results) ): - count = 0 - success = 0 - for idx, val in result: - count += 1 - if val.success and val.cost <= COST_THRESHOLD: - success += 1 - row[f"{name}_success"] = success + row[f"{name}_success"] = result row[f"{name}_count"] = count - successes.append(max(success, 0.5)) + successes.append(max(result, 0.5)) counts.append(count) success_weight = sum( diff --git a/deepdog/alt_bayes_run_simulpairs.py b/deepdog/bayes_run_simulpairs.py similarity index 80% rename from deepdog/alt_bayes_run_simulpairs.py rename to deepdog/bayes_run_simulpairs.py index 34a6cf5..cffc9cd 100644 --- a/deepdog/alt_bayes_run_simulpairs.py +++ b/deepdog/bayes_run_simulpairs.py @@ -25,7 +25,7 @@ _logger = logging.getLogger(__name__) def get_a_simul_result_using_pairs(input) -> numpy.ndarray: ( - discretisation, + model, dot_inputs, pair_inputs, local_lows, @@ -42,16 +42,12 @@ def get_a_simul_result_using_pairs(input) -> numpy.ndarray: local_total = 0 combined_total = 0 - sample_dipoles = discretisation.get_model().get_n_single_dipoles( + sample_dipoles = model.get_monte_carlo_dipole_inputs( monte_carlo_count, max_frequency, rng_to_use=rng ) - local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoles( - dot_inputs, sample_dipoles - ) - local_matches = pdme.util.fast_v_calc.between( - local_vals, local_lows, local_highs - ) - nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal( + local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) + local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs) + nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses( pair_inputs, sample_dipoles ) nonlocal_matches = pdme.util.fast_v_calc.between( @@ -64,7 +60,7 @@ def get_a_simul_result_using_pairs(input) -> numpy.ndarray: return numpy.array([local_total, combined_total]) -class AltBayesRunSimulPairs: +class BayesRunSimulPairs: """ A dual pairs-nonpairs Bayes run for a given set of dots. @@ -73,11 +69,11 @@ class AltBayesRunSimulPairs: dot_inputs : Sequence[DotInput] The dot inputs for this bayes run. - discretisations_with_names : Sequence[Tuple(str, pdme.model.Model)] + models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] The models to evaluate. - actual_model_discretisation : pdme.model.Discretisation - The discretisation for the model which is actually correct. + actual_model : pdme.model.DipoleModel + The modoel for the model which is actually correct. filename_slug : str The filename slug to include. @@ -90,8 +86,8 @@ class AltBayesRunSimulPairs: self, dot_positions: Sequence[numpy.typing.ArrayLike], frequency_range: Sequence[float], - discretisations_with_names: Sequence[Tuple[str, pdme.model.Discretisation]], - actual_model: pdme.model.Model, + models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], + actual_model: pdme.model.DipoleModel, filename_slug: str, run_count: int = 100, low_error: float = 0.9, @@ -120,10 +116,17 @@ class AltBayesRunSimulPairs: pdme.measurement.input_types.dot_pair_inputs_to_array(self.dot_pair_inputs) ) - self.discretisations = [disc for (_, disc) in discretisations_with_names] - self.model_names = [name for (name, _) in discretisations_with_names] + self.models = [mod for (_, mod) in models_with_names] + self.model_names = [name for (name, _) in models_with_names] self.actual_model = actual_model - self.model_count = len(self.discretisations) + + self.n: int + try: + self.n = self.actual_model.n # type: ignore + except AttributeError: + self.n = 1 + + self.model_count = len(self.models) self.monte_carlo_count = monte_carlo_count self.monte_carlo_cycles = monte_carlo_cycles self.target_success = target_success @@ -139,7 +142,16 @@ class AltBayesRunSimulPairs: self.pairs_high_error = self.high_error else: self.pairs_high_error = pairs_high_error - self.csv_fields = ["dipole_moment", "dipole_location", "dipole_frequency"] + + self.csv_fields = [] + for i in range(self.n): + self.csv_fields.extend( + [ + f"dipole_moment_{i+1}", + f"dipole_location_{i+1}", + f"dipole_frequency_{i+1}", + ] + ) self.compensate_zeros = True self.chunksize = chunksize for name in self.model_names: @@ -174,11 +186,8 @@ class AltBayesRunSimulPairs: for run in range(1, self.run_count + 1): - rng = numpy.random.default_rng() - frequency = rng.uniform(1, self.max_frequency) - # Generate the actual dipoles - actual_dipoles = self.actual_model.get_dipoles(frequency) + actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) dots = actual_dipoles.get_percent_range_dot_measurements( self.dot_inputs, self.low_error, self.high_error @@ -208,9 +217,9 @@ class AltBayesRunSimulPairs: results_pairs = [] results_no_pairs = [] - _logger.debug("Going to iterate over discretisations now") - for disc_count, discretisation in enumerate(self.discretisations): - _logger.debug(f"Doing discretisation #{disc_count}") + _logger.debug("Going to iterate over models now") + for model_count, model in enumerate(self.models): + _logger.debug(f"Doing model #{model_count}") core_count = multiprocessing.cpu_count() - 1 or 1 with multiprocessing.Pool(core_count) as pool: @@ -223,7 +232,7 @@ class AltBayesRunSimulPairs: <= self.target_success ): _logger.debug(f"Starting cycle {cycles}") - _logger.debug(f"(pair, no_pair) successes are {(cycle_success_pairs, cycle_success_no_pairs)}") + cycles += 1 current_success_pairs = 0 current_success_no_pairs = 0 @@ -241,7 +250,7 @@ class AltBayesRunSimulPairs: get_a_simul_result_using_pairs, [ ( - discretisation, + model, self.dot_inputs_array, self.dot_pair_inputs_array, lows, @@ -264,20 +273,36 @@ class AltBayesRunSimulPairs: cycle_success_no_pairs += current_success_no_pairs cycle_success_pairs += current_success_pairs + _logger.debug( + f"(pair, no_pair) successes are {(cycle_success_pairs, cycle_success_no_pairs)}" + ) results_pairs.append((cycle_count, cycle_success_pairs)) results_no_pairs.append((cycle_count, cycle_success_no_pairs)) _logger.debug("Done, constructing output now") row_pairs = { - "dipole_moment": actual_dipoles.dipoles[0].p, - "dipole_location": actual_dipoles.dipoles[0].s, - "dipole_frequency": actual_dipoles.dipoles[0].w, + "dipole_moment_1": actual_dipoles.dipoles[0].p, + "dipole_location_1": actual_dipoles.dipoles[0].s, + "dipole_frequency_1": actual_dipoles.dipoles[0].w, } row_no_pairs = { - "dipole_moment": actual_dipoles.dipoles[0].p, - "dipole_location": actual_dipoles.dipoles[0].s, - "dipole_frequency": actual_dipoles.dipoles[0].w, + "dipole_moment_1": actual_dipoles.dipoles[0].p, + "dipole_location_1": actual_dipoles.dipoles[0].s, + "dipole_frequency_1": actual_dipoles.dipoles[0].w, } + for i in range(1, self.n): + try: + current_dipoles = actual_dipoles.dipoles[i] + row_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p + row_pairs[f"dipole_location_{i+1}"] = current_dipoles.s + row_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w + row_no_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p + row_no_pairs[f"dipole_location_{i+1}"] = current_dipoles.s + row_no_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w + except IndexError: + _logger.info(f"Not writing anymore, saw end after {i}") + break + successes_pairs: List[float] = [] successes_no_pairs: List[float] = [] counts: List[int] = [] diff --git a/deepdog/diagnostic.py b/deepdog/diagnostic.py deleted file mode 100644 index 4dfe12d..0000000 --- a/deepdog/diagnostic.py +++ /dev/null @@ -1,160 +0,0 @@ -from pdme.measurement import OscillatingDipole, OscillatingDipoleArrangement -import pdme -from deepdog.bayes_run import DotInput -import datetime -import numpy -from dataclasses import dataclass -import logging -from typing import Sequence, Tuple -import csv -import itertools -import multiprocessing - -_logger = logging.getLogger(__name__) - - -def get_a_result(discretisation, dots, index): - return (index, discretisation.solve_for_index(dots, index)) - - -@dataclass -class SingleDipoleDiagnostic: - model: str - index: Tuple - bounds: Tuple - actual_dipole: OscillatingDipole - result_dipole: OscillatingDipole - success: bool - - def __post_init__(self) -> None: - self.p_actual_x = self.actual_dipole.p[0] - self.p_actual_y = self.actual_dipole.p[1] - self.p_actual_z = self.actual_dipole.p[2] - self.s_actual_x = self.actual_dipole.s[0] - self.s_actual_y = self.actual_dipole.s[1] - self.s_actual_z = self.actual_dipole.s[2] - self.p_result_x = self.result_dipole.p[0] - self.p_result_y = self.result_dipole.p[1] - self.p_result_z = self.result_dipole.p[2] - self.s_result_x = self.result_dipole.s[0] - self.s_result_y = self.result_dipole.s[1] - self.s_result_z = self.result_dipole.s[2] - self.w_actual = self.actual_dipole.w - self.w_result = self.result_dipole.w - - -class Diagnostic: - """ - Represents a diagnostic for a single dipole moment given a set of discretisations. - - Parameters - ---------- - dot_inputs : Sequence[DotInput] - The dot inputs for this diagnostic. - - discretisations_with_names : Sequence[Tuple(str, pdme.model.Model)] - The models to evaluate. - - actual_model_discretisation : pdme.model.Discretisation - The discretisation for the model which is actually correct. - - filename_slug : str - The filename slug to include. - - run_count: int - The number of runs to do. - """ - - def __init__( - self, - actual_dipole_moment: numpy.ndarray, - actual_dipole_position: numpy.ndarray, - actual_dipole_frequency: float, - dot_inputs: Sequence[DotInput], - discretisations_with_names: Sequence[Tuple[str, pdme.model.Discretisation]], - filename_slug: str, - ) -> None: - self.dipoles = OscillatingDipoleArrangement( - [ - OscillatingDipole( - actual_dipole_moment, - actual_dipole_position, - actual_dipole_frequency, - ) - ] - ) - self.dots = self.dipoles.get_dot_measurements(dot_inputs) - - self.discretisations_with_names = discretisations_with_names - self.model_count = len(self.discretisations_with_names) - - self.csv_fields = [ - "model", - "index", - "bounds", - "p_actual_x", - "p_actual_y", - "p_actual_z", - "s_actual_x", - "s_actual_y", - "s_actual_z", - "w_actual", - "success", - "p_result_x", - "p_result_y", - "p_result_z", - "s_result_x", - "s_result_y", - "s_result_z", - "w_result", - ] - - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.filename = f"{timestamp}-{filename_slug}.diag.csv" - - def go(self): - with open(self.filename, "a", newline="") as outfile: - # csv fields - writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") - writer.writeheader() - - for (name, discretisation) in self.discretisations_with_names: - _logger.info(f"Working on discretisation {name}") - - results = [] - with multiprocessing.Pool(multiprocessing.cpu_count() - 1 or 1) as pool: - results = pool.starmap( - get_a_result, - zip( - itertools.repeat(discretisation), - itertools.repeat(self.dots), - discretisation.all_indices(), - ), - ) - - with open(self.filename, "a", newline="") as outfile: - writer = csv.DictWriter( - outfile, - fieldnames=self.csv_fields, - dialect="unix", - extrasaction="ignore", - ) - - for idx, result in results: - - bounds = discretisation.bounds(idx) - - actual_success = result.success and result.cost <= 1e-10 - diag_row = SingleDipoleDiagnostic( - name, - idx, - bounds, - self.dipoles.dipoles[0], - discretisation.model.solution_as_dipoles(result.normalised_x)[ - 0 - ], - actual_success, - ) - row = vars(diag_row) - _logger.debug(f"Writing result {row}") - writer.writerow(row) diff --git a/poetry.lock b/poetry.lock index 6b98185..b7c6816 100644 --- a/poetry.lock +++ b/poetry.lock @@ -335,7 +335,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" [[package]] name = "pdme" -version = "0.6.2" +version = "0.8.2" description = "Python dipole model evaluator" category = "main" optional = false @@ -740,7 +740,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.8,<3.10" -content-hash = "98877f53c0ca996cd6eaa2c3b7391e391d29c7a4d3f1e08159fc999a3e4ad296" +content-hash = "c1b27cf9297e4777e78a72caba5f560be87a0f3b27c001b7a05b18adfdca2d1c" [metadata.files] atomicwrites = [ @@ -1025,8 +1025,8 @@ pathspec = [ {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, ] pdme = [ - {file = "pdme-0.6.2-py3-none-any.whl", hash = "sha256:7e81081be243006f86c31d3590a77a529764204b3831b83a939a87025d463e26"}, - {file = "pdme-0.6.2.tar.gz", hash = "sha256:59c2a3249338317f22cf268c55c90d06b563d42a9278e2826753f6d491379f67"}, + {file = "pdme-0.8.2-py3-none-any.whl", hash = "sha256:872aa43f99d357b8e802036c6de2e529942dd24c76a830142abd0b9ff6fd869c"}, + {file = "pdme-0.8.2.tar.gz", hash = "sha256:36a2bef50ab2c24a82c44bd9ded473991332358ab393f46e9e0bb0b83d538a0f"}, ] pkginfo = [ {file = "pkginfo-1.8.2-py2.py3-none-any.whl", hash = "sha256:c24c487c6a7f72c66e816ab1796b96ac6c3d14d49338293d2141664330b55ffc"}, diff --git a/pyproject.toml b/pyproject.toml index 005a195..3567e5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Deepak Mallubhotla "] [tool.poetry.dependencies] python = "^3.8,<3.10" -pdme = "0.6.2" +pdme = "0.8.2" [tool.poetry.dev-dependencies] pytest = ">=6"