From a089951bbefcd8a0b2efeb49b7a8090412cbb23d Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Mon, 1 May 2023 10:05:46 -0500 Subject: [PATCH 1/2] feat: adds pair capability to real spectrum run hopefully --- deepdog/real_spectrum_run.py | 138 +++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 21 deletions(-) diff --git a/deepdog/real_spectrum_run.py b/deepdog/real_spectrum_run.py index 5b682dd..d2ed746 100644 --- a/deepdog/real_spectrum_run.py +++ b/deepdog/real_spectrum_run.py @@ -5,7 +5,7 @@ import pdme.measurement.input_types import pdme.measurement.oscillating_dipole import pdme.util.fast_v_calc import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List, Dict, Union +from typing import Sequence, Tuple, List, Dict, Union, Optional import datetime import csv import multiprocessing @@ -32,6 +32,52 @@ def get_a_result(input) -> int: return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) +def get_a_result_fast_filter_pairs(input) -> int: + ( + model, + dot_inputs, + lows, + highs, + pair_inputs, + pair_lows, + pair_highs, + monte_carlo_count, + seed, + ) = input + + rng = numpy.random.default_rng(seed) + # TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models. + sample_dipoles = model.get_monte_carlo_dipole_inputs( + monte_carlo_count, None, rng_to_use=rng + ) + + current_sample = sample_dipoles + for di, low, high in zip(dot_inputs, lows, highs): + + if len(current_sample) < 1: + break + vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses( + numpy.array([di]), current_sample + ) + + current_sample = current_sample[numpy.all((vals > low) & (vals < high), axis=1)] + + for pi, plow, phigh in zip(pair_inputs, pair_lows, pair_highs): + if len(current_sample) < 1: + break + vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses( + numpy.array([pi]), current_sample + ) + + current_sample = current_sample[ + numpy.all( + ((vals > plow) & (vals < phigh)) | ((vals < plow) & (vals > phigh)), + axis=1, + ) + ] + return len(current_sample) + + def get_a_result_fast_filter(input) -> int: model, dot_inputs, lows, highs, monte_carlo_count, seed = input @@ -89,6 +135,9 @@ class RealSpectrumRun: initial_seed: int = 12345, use_fast_filter: bool = True, cap_core_count: int = 0, + pair_measurements: Optional[ + Sequence[pdme.measurement.DotPairRangeMeasurement] + ] = None, ) -> None: self.measurements = measurements self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements] @@ -97,6 +146,21 @@ class RealSpectrumRun: self.dot_inputs ) + if pair_measurements is not None: + self.pair_measurements = pair_measurements + self.use_pair_measurements = True + self.dot_pair_inputs = [ + (measure.r1, measure.r2, measure.f) + for measure in self.pair_measurements + ] + self.dot_pair_inputs_array = ( + pdme.measurement.input_types.dot_pair_inputs_to_array( + self.dot_pair_inputs + ) + ) + else: + self.use_pair_measurements = False + self.models = [model for (_, model) in models_with_names] self.model_names = [name for (name, _) in models_with_names] self.model_count = len(self.models) @@ -138,6 +202,16 @@ class RealSpectrumRun: self.measurements ) + pair_lows = None + pair_highs = None + if self.use_pair_measurements: + ( + pair_lows, + pair_highs, + ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( + self.pair_measurements + ) + # define a new seed sequence for each run seed_sequence = numpy.random.SeedSequence(self.initial_seed) @@ -168,27 +242,49 @@ class RealSpectrumRun: # that way we get more stuff. seeds = seed_sequence.spawn(self.monte_carlo_cycles) - if self.use_fast_filter: - result_func = get_a_result_fast_filter - else: - result_func = get_a_result - current_success = sum( - pool.imap_unordered( - result_func, - [ - ( - model, - self.dot_inputs_array, - lows, - highs, - self.monte_carlo_count, - seed, - ) - for seed in seeds - ], - self.chunksize, + if self.use_pair_measurements: + current_success = sum( + pool.imap_unordered( + get_a_result_fast_filter_pairs, + [ + ( + model, + self.dot_inputs_array, + lows, + highs, + self.dot_pair_inputs_array, + pair_lows, + pair_highs, + self.monte_carlo_count, + seed, + ) + for seed in seeds + ], + self.chunksize, + ) + ) + else: + if self.use_fast_filter: + result_func = get_a_result_fast_filter + else: + result_func = get_a_result + current_success = sum( + pool.imap_unordered( + result_func, + [ + ( + model, + self.dot_inputs_array, + lows, + highs, + self.monte_carlo_count, + seed, + ) + for seed in seeds + ], + self.chunksize, + ) ) - ) cycle_success += current_success _logger.debug(f"current running successes: {cycle_success}") -- 2.49.1 From a015daf5ff6fa5f6155c8d7e02981b588840a5b0 Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Mon, 1 May 2023 10:17:12 -0500 Subject: [PATCH 2/2] feat!: removes fastfilter parameter because it should never be needed --- deepdog/real_spectrum_run.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/deepdog/real_spectrum_run.py b/deepdog/real_spectrum_run.py index d2ed746..24d30fa 100644 --- a/deepdog/real_spectrum_run.py +++ b/deepdog/real_spectrum_run.py @@ -20,18 +20,6 @@ CHUNKSIZE = 50 _logger = logging.getLogger(__name__) -def get_a_result(input) -> int: - model, dot_inputs, lows, highs, monte_carlo_count, seed = input - - rng = numpy.random.default_rng(seed) - # TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models. - sample_dipoles = model.get_monte_carlo_dipole_inputs( - monte_carlo_count, None, rng_to_use=rng - ) - vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) - return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) - - def get_a_result_fast_filter_pairs(input) -> int: ( model, @@ -133,7 +121,6 @@ class RealSpectrumRun: max_monte_carlo_cycles_steps: int = 10, chunksize: int = CHUNKSIZE, initial_seed: int = 12345, - use_fast_filter: bool = True, cap_core_count: int = 0, pair_measurements: Optional[ Sequence[pdme.measurement.DotPairRangeMeasurement] @@ -181,10 +168,9 @@ class RealSpectrumRun: self.probabilities = [1 / self.model_count] * self.model_count timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.use_fast_filter = use_fast_filter - ff_string = "no_fast_filter" - if self.use_fast_filter: - ff_string = "fast_filter" + + ff_string = "fast_filter" + self.filename = f"{timestamp}-{filename_slug}.realdata.{ff_string}.bayesrun.csv" self.initial_seed = initial_seed @@ -264,13 +250,10 @@ class RealSpectrumRun: ) ) else: - if self.use_fast_filter: - result_func = get_a_result_fast_filter - else: - result_func = get_a_result + current_success = sum( pool.imap_unordered( - result_func, + get_a_result_fast_filter, [ ( model, -- 2.49.1