diff --git a/deepdog/real_spectrum_run.py b/deepdog/real_spectrum_run.py index 5b682dd..24d30fa 100644 --- a/deepdog/real_spectrum_run.py +++ b/deepdog/real_spectrum_run.py @@ -5,7 +5,7 @@ import pdme.measurement.input_types import pdme.measurement.oscillating_dipole import pdme.util.fast_v_calc import pdme.util.fast_nonlocal_spectrum -from typing import Sequence, Tuple, List, Dict, Union +from typing import Sequence, Tuple, List, Dict, Union, Optional import datetime import csv import multiprocessing @@ -20,16 +20,50 @@ CHUNKSIZE = 50 _logger = logging.getLogger(__name__) -def get_a_result(input) -> int: - model, dot_inputs, lows, highs, monte_carlo_count, seed = input +def get_a_result_fast_filter_pairs(input) -> int: + ( + model, + dot_inputs, + lows, + highs, + pair_inputs, + pair_lows, + pair_highs, + monte_carlo_count, + seed, + ) = input rng = numpy.random.default_rng(seed) # TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models. sample_dipoles = model.get_monte_carlo_dipole_inputs( monte_carlo_count, None, rng_to_use=rng ) - vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles) - return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs)) + + current_sample = sample_dipoles + for di, low, high in zip(dot_inputs, lows, highs): + + if len(current_sample) < 1: + break + vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses( + numpy.array([di]), current_sample + ) + + current_sample = current_sample[numpy.all((vals > low) & (vals < high), axis=1)] + + for pi, plow, phigh in zip(pair_inputs, pair_lows, pair_highs): + if len(current_sample) < 1: + break + vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses( + numpy.array([pi]), current_sample + ) + + current_sample = current_sample[ + numpy.all( + ((vals > plow) & (vals < phigh)) | ((vals < plow) & (vals > phigh)), + axis=1, + ) + ] + return len(current_sample) def get_a_result_fast_filter(input) -> int: @@ -87,8 +121,10 @@ class RealSpectrumRun: max_monte_carlo_cycles_steps: int = 10, chunksize: int = CHUNKSIZE, initial_seed: int = 12345, - use_fast_filter: bool = True, cap_core_count: int = 0, + pair_measurements: Optional[ + Sequence[pdme.measurement.DotPairRangeMeasurement] + ] = None, ) -> None: self.measurements = measurements self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements] @@ -97,6 +133,21 @@ class RealSpectrumRun: self.dot_inputs ) + if pair_measurements is not None: + self.pair_measurements = pair_measurements + self.use_pair_measurements = True + self.dot_pair_inputs = [ + (measure.r1, measure.r2, measure.f) + for measure in self.pair_measurements + ] + self.dot_pair_inputs_array = ( + pdme.measurement.input_types.dot_pair_inputs_to_array( + self.dot_pair_inputs + ) + ) + else: + self.use_pair_measurements = False + self.models = [model for (_, model) in models_with_names] self.model_names = [name for (name, _) in models_with_names] self.model_count = len(self.models) @@ -117,10 +168,9 @@ class RealSpectrumRun: self.probabilities = [1 / self.model_count] * self.model_count timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - self.use_fast_filter = use_fast_filter - ff_string = "no_fast_filter" - if self.use_fast_filter: - ff_string = "fast_filter" + + ff_string = "fast_filter" + self.filename = f"{timestamp}-{filename_slug}.realdata.{ff_string}.bayesrun.csv" self.initial_seed = initial_seed @@ -138,6 +188,16 @@ class RealSpectrumRun: self.measurements ) + pair_lows = None + pair_highs = None + if self.use_pair_measurements: + ( + pair_lows, + pair_highs, + ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( + self.pair_measurements + ) + # define a new seed sequence for each run seed_sequence = numpy.random.SeedSequence(self.initial_seed) @@ -168,27 +228,46 @@ class RealSpectrumRun: # that way we get more stuff. seeds = seed_sequence.spawn(self.monte_carlo_cycles) - if self.use_fast_filter: - result_func = get_a_result_fast_filter - else: - result_func = get_a_result - current_success = sum( - pool.imap_unordered( - result_func, - [ - ( - model, - self.dot_inputs_array, - lows, - highs, - self.monte_carlo_count, - seed, - ) - for seed in seeds - ], - self.chunksize, + if self.use_pair_measurements: + current_success = sum( + pool.imap_unordered( + get_a_result_fast_filter_pairs, + [ + ( + model, + self.dot_inputs_array, + lows, + highs, + self.dot_pair_inputs_array, + pair_lows, + pair_highs, + self.monte_carlo_count, + seed, + ) + for seed in seeds + ], + self.chunksize, + ) + ) + else: + + current_success = sum( + pool.imap_unordered( + get_a_result_fast_filter, + [ + ( + model, + self.dot_inputs_array, + lows, + highs, + self.monte_carlo_count, + seed, + ) + for seed in seeds + ], + self.chunksize, + ) ) - ) cycle_success += current_success _logger.debug(f"current running successes: {cycle_success}")