From f2b1a1dd3b3436e37d84f7843b9b2a202be4b51c Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Wed, 1 May 2024 15:22:59 -0500 Subject: [PATCH] feat: Adds more powerful direct mc runs to sub for old real spectrum run --- deepdog/direct_monte_carlo/direct_mc.py | 243 +++++++++++++++++----- deepdog/direct_monte_carlo/dmc_filters.py | 192 +++++++++++------ deepdog/real_spectrum_run.py | 55 +---- poetry.lock | 7 +- 4 files changed, 330 insertions(+), 167 deletions(-) diff --git a/deepdog/direct_monte_carlo/direct_mc.py b/deepdog/direct_monte_carlo/direct_mc.py index 1d1ed48..50ada69 100644 --- a/deepdog/direct_monte_carlo/direct_mc.py +++ b/deepdog/direct_monte_carlo/direct_mc.py @@ -1,13 +1,16 @@ +import csv import pdme.model import pdme.measurement import pdme.measurement.input_types import pdme.subspace_simulation -from typing import Tuple, Dict, NewType, Any +import datetime +from typing import Tuple, Dict, NewType, Any, Sequence from dataclasses import dataclass import logging import numpy import numpy.random import pdme.util.fast_v_calc +import multiprocessing _logger = logging.getLogger(__name__) @@ -17,6 +20,7 @@ class DirectMonteCarloResult: successes: int monte_carlo_count: int likelihood: float + model_name: str @dataclass @@ -28,6 +32,10 @@ class DirectMonteCarloConfig: monte_carlo_seed: int = 1234 write_successes_to_file: bool = False tag: str = "" + cap_core_count: int = 0 # 0 means cap at num cores - 1 + chunk_size: int = 50 + write_bayesrun_file = True + # chunk size of some kind # Aliasing dict as a generic data container @@ -51,8 +59,8 @@ class DirectMonteCarloRun: Parameters ---------- - model_name_pair : Sequence[Tuple(str, pdme.model.DipoleModel)] - The model to evaluate, with name. + model_name_pairs : Sequence[Tuple(str, pdme.model.DipoleModel)] + The models to evaluate, with names measurements: Sequence[pdme.measurement.DotRangeMeasurement] The measurements as dot ranges to use as the bounds for the Monte Carlo calculation. @@ -78,11 +86,11 @@ class DirectMonteCarloRun: def __init__( self, - model_name_pair: Tuple[str, pdme.model.DipoleModel], + model_name_pairs: Sequence[Tuple[str, pdme.model.DipoleModel]], filter: DirectMonteCarloFilter, config: DirectMonteCarloConfig, ): - self.model_name, self.model = model_name_pair + self.model_name_pairs = model_name_pairs # self.measurements = measurements # self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements] @@ -100,10 +108,16 @@ class DirectMonteCarloRun: # self.measurements # ) - def _single_run(self, seed) -> numpy.ndarray: + def _single_run( + self, model_name_pair: Tuple[str, pdme.model.DipoleModel], seed + ) -> numpy.ndarray: rng = numpy.random.default_rng(seed) - sample_dipoles = self.model.get_monte_carlo_dipole_inputs( + _, model = model_name_pair + # don't log here it's madness + # _logger.info(f"Executing for model {model_name}") + + sample_dipoles = model.get_monte_carlo_dipole_inputs( self.config.monte_carlo_count_per_cycle, -1, rng ) @@ -123,52 +137,183 @@ class DirectMonteCarloRun: # ] # return current_sample - def execute(self) -> DirectMonteCarloResult: - step_count = 0 - total_success = 0 - total_count = 0 + def _wrapped_single_run(self, args: Tuple): + """ + single run wrapped up for multiprocessing call. + + takes in a tuple of arguments corresponding to + (model_name_pair, seed) + """ + # here's where we do our work + + model_name_pair, seed = args + cycle_success_configs = self._single_run(model_name_pair, seed) + cycle_success_count = len(cycle_success_configs) + + return cycle_success_count + + def execute_no_multiprocessing(self) -> Sequence[DirectMonteCarloResult]: count_per_step = ( self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles ) seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed) - while (step_count < self.config.max_monte_carlo_cycles_steps) and ( - total_success < self.config.target_success - ): - _logger.debug(f"Executing step {step_count}") - for cycle_i, seed in enumerate( - seed_sequence.spawn(self.config.monte_carlo_cycles) - ): - cycle_success_configs = self._single_run(seed) - cycle_success_count = len(cycle_success_configs) - if cycle_success_count > 0: - _logger.debug( - f"For cycle {cycle_i} received {cycle_success_count} successes" - ) - _logger.debug(cycle_success_configs) - if self.config.write_successes_to_file: - sorted_by_freq = numpy.array( - [ - pdme.subspace_simulation.sort_array_of_dipoles_by_frequency( - dipole_config - ) - for dipole_config in cycle_success_configs - ] - ) - dipole_count = numpy.array(cycle_success_configs).shape[1] - for n in range(dipole_count): - numpy.savetxt( - f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv", - sorted_by_freq[:, n], - delimiter=",", - ) - total_success += cycle_success_count - _logger.debug(f"At end of step {step_count} have {total_success} successes") - step_count += 1 - total_count += count_per_step - return DirectMonteCarloResult( - successes=total_success, - monte_carlo_count=total_count, - likelihood=total_success / total_count, + # core count etc. logic here + + results = [] + for model_name_pair in self.model_name_pairs: + step_count = 0 + total_success = 0 + total_count = 0 + + _logger.info(f"Working on model {model_name_pair[0]}") + # This is probably where multiprocessing logic should go + while (step_count < self.config.max_monte_carlo_cycles_steps) and ( + total_success < self.config.target_success + ): + _logger.debug(f"Executing step {step_count}") + for cycle_i, seed in enumerate( + seed_sequence.spawn(self.config.monte_carlo_cycles) + ): + # here's where we do our work + cycle_success_configs = self._single_run(model_name_pair, seed) + cycle_success_count = len(cycle_success_configs) + if cycle_success_count > 0: + _logger.debug( + f"For cycle {cycle_i} received {cycle_success_count} successes" + ) + # _logger.debug(cycle_success_configs) + if self.config.write_successes_to_file: + sorted_by_freq = numpy.array( + [ + pdme.subspace_simulation.sort_array_of_dipoles_by_frequency( + dipole_config + ) + for dipole_config in cycle_success_configs + ] + ) + dipole_count = numpy.array(cycle_success_configs).shape[1] + for n in range(dipole_count): + numpy.savetxt( + f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv", + sorted_by_freq[:, n], + delimiter=",", + ) + total_success += cycle_success_count + _logger.debug( + f"At end of step {step_count} have {total_success} successes" + ) + step_count += 1 + total_count += count_per_step + + results.append( + DirectMonteCarloResult( + successes=total_success, + monte_carlo_count=total_count, + likelihood=total_success / total_count, + model_name=model_name_pair[0], + ) + ) + return results + + def execute(self) -> Sequence[DirectMonteCarloResult]: + + # set starting execution timestamp + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + + count_per_step = ( + self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles ) + seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed) + + # core count etc. logic here + core_count = multiprocessing.cpu_count() - 1 or 1 + if (self.config.cap_core_count >= 1) and ( + self.config.cap_core_count < core_count + ): + core_count = self.config.cap_core_count + _logger.info(f"Using {core_count} cores") + + results = [] + with multiprocessing.Pool(core_count) as pool: + + for model_name_pair in self.model_name_pairs: + _logger.info(f"Working on model {model_name_pair[0]}") + # This is probably where multiprocessing logic should go + + step_count = 0 + total_success = 0 + total_count = 0 + + while (step_count < self.config.max_monte_carlo_cycles_steps) and ( + total_success < self.config.target_success + ): + + step_count += 1 + + _logger.debug(f"Executing step {step_count}") + + seeds = seed_sequence.spawn(self.config.monte_carlo_cycles) + + pool_results = sum( + pool.imap_unordered( + self._wrapped_single_run, + [(model_name_pair, seed) for seed in seeds], + self.config.chunk_size, + ) + ) + _logger.debug(f"Pool results: {pool_results}") + + total_success += pool_results + total_count += count_per_step + _logger.debug( + f"At end of step {step_count} have {total_success} successes" + ) + + results.append( + DirectMonteCarloResult( + successes=total_success, + monte_carlo_count=total_count, + likelihood=total_success / total_count, + model_name=model_name_pair[0], + ) + ) + + if self.config.write_bayesrun_file: + + filename = ( + f"{timestamp}-{self.config.tag}.realdata.fast_filter.bayesrun.csv" + ) + _logger.info(f"Going to write to file [{filename}]") + # row: Dict[str, Union[int, float, str]] = {} + row = {} + + num_models = len(self.model_name_pairs) + success_weight = sum( + [ + (res.successes / res.monte_carlo_count) / num_models + for res in results + ] + ) + + for res in results: + row.update( + { + f"{res.model_name}_success": res.successes, + f"{res.model_name}_count": res.monte_carlo_count, + f"{res.model_name}_prob": ( + res.successes / res.monte_carlo_count + ) + / (num_models * success_weight), + } + ) + _logger.info(f"Writing row {row}") + fieldnames = list(row.keys()) + + with open(filename, "w", newline="") as outfile: + writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect="unix") + writer.writeheader() + writer.writerow(row) + + return results diff --git a/deepdog/direct_monte_carlo/dmc_filters.py b/deepdog/direct_monte_carlo/dmc_filters.py index 1981577..1193281 100644 --- a/deepdog/direct_monte_carlo/dmc_filters.py +++ b/deepdog/direct_monte_carlo/dmc_filters.py @@ -39,6 +39,135 @@ class SingleDotPotentialFilter(DirectMonteCarloFilter): return current_sample +class SingleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter): + def __init__(self, measurements: Sequence[pdme.measurement.DotRangeMeasurement]): + self.measurements = measurements + self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements] + + self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( + self.dot_inputs + ) + ( + self.lows, + self.highs, + ) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays( + self.measurements + ) + + # oh no not this again + def fast_s_spin_qubit_tarucha_apsd_dipoleses( + self, dot_inputs: numpy.ndarray, dipoleses: numpy.ndarray + ) -> numpy.ndarray: + """ + No error correction here baby. + """ + + # We're going to annotate the indices on this class. + # Let's define some indices: + # A -> index of dipoleses configurations + # j -> within a particular configuration, indexes dipole j + # measurement_index -> if we have 100 frequencies for example, indexes which one of them it is + # If we need to use numbers, let's use A -> 2, j -> 10, measurement_index -> 9 for consistency with + # my other notes + + # axes are [dipole_config_idx A, dipole_idx j, {px, py, pz}3] + ps = dipoleses[:, :, 0:3] + # axes are [dipole_config_idx A, dipole_idx j, {sx, sy, sz}3] + ss = dipoleses[:, :, 3:6] + # axes are [dipole_config_idx A, dipole_idx j, w], last axis is just 1 + ws = dipoleses[:, :, 6] + + # dot_index is either 0 or 1 for dot1 or dot2 + # hopefully this adhoc grammar is making sense, with the explicit labelling of the values of the last axis in cartesian space + # axes are [measurement_idx, {dot_index}, {rx, ry, rz}] where the inner {dot_index} is gone + # [measurement_idx, cartesian3] + rs = dot_inputs[:, 0:3] + # axes are [measurement_idx] + fs = dot_inputs[:, 3] + + # first operation! + # r1s has shape [measurement_idx, rxs] + # None inserts an extra axis so the r1s[:, None] has shape + # [measurement_idx, 1]([rxs]) with the last rxs hidden + # + # ss has shape [ A, j, {sx, sy, sz}3], so second term has shape [A, 1, j]([sxs]) + # these broadcast from right to left + # [ measurement_idx, 1, rxs] + # [A, 1, j, sxs] + # resulting in [A, measurement_idx, j, cart3] sxs rxs are both cart3 + diffses = rs[:, None] - ss[:, None, :] + + # norms takes out axis 3, the last one, giving [A, measurement_idx, j] + norms = numpy.linalg.norm(diffses, axis=3) + + # _logger.info(f"norms1: {norms1}") + # _logger.info(f"norms1 shape: {norms1.shape}") + # + # diffses1 (A, measurement_idx, j, xs) + # ps: (A, j, px) + # result is (A, measurement_idx, j) + # intermediate_dot_prod = numpy.einsum("abcd,acd->abc", diffses1, ps) + # _logger.info(f"dot product shape: {intermediate_dot_prod.shape}") + + # transpose makes it (j, measurement_idx, A) + # transp_intermediate_dot_prod = numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3)) + + # transpose of diffses has shape (xs, j, measurement_idx, A) + # numpy.transpose(diffses1) + # _logger.info(f"dot product shape: {transp_intermediate_dot_prod.shape}") + + # inner transpose is (j, measurement_idx, A) * (xs, j, measurement_idx, A) + # next transpose puts it back to (A, measurement_idx, j, xs) + # p_dot_r_times_r_term = 3 * numpy.transpose(numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3)) * numpy.transpose(diffses1)) + # _logger.info(f"p_dot_r_times_r_term: {p_dot_r_times_r_term.shape}") + + # only x axis puts us at (A, measurement_idx, j) + # p_dot_r_times_r_term_x_only = p_dot_r_times_r_term[:, :, :, 0] + # _logger.info(f"p_dot_r_times_r_term_x_only.shape: {p_dot_r_times_r_term_x_only.shape}") + + # now to complete the numerator we subtract the ps, which are (A, j, px): + # slicing off the end gives us (A, j), so we newaxis to get (A, 1, j) + # _logger.info(ps[:, numpy.newaxis, :, 0].shape) + alphses = ( + ( + 3 + * numpy.transpose( + numpy.transpose( + numpy.einsum("abcd,acd->abc", diffses, ps) / (norms**2) + ) + * numpy.transpose(diffses) + )[:, :, :, 0] + ) + - ps[:, numpy.newaxis, :, 0] + ) / (norms**3) + + bses = ( + 2 + * numpy.pi + * ws[:, None, :] + / ((2 * numpy.pi * fs[:, None]) ** 2 + 4 * ws[:, None, :] ** 2) + ) + + return numpy.einsum("...j->...", alphses * alphses * bses) + + def filter_samples(self, samples: ndarray) -> ndarray: + current_sample = samples + for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs): + + if len(current_sample) < 1: + break + vals = self.fast_s_spin_qubit_tarucha_apsd_dipoleses( + numpy.array([di]), current_sample + ) + # _logger.info(vals) + + current_sample = current_sample[ + numpy.all((vals > low) & (vals < high), axis=1) + ] + # _logger.info(f"leaving with {len(current_sample)}") + return current_sample + + class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter): def __init__( self, @@ -59,59 +188,6 @@ class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter): self.pair_phase_measurements ) - def fast_s_spin_qubit_tarucha_nonlocal_dipoleses( - self, dot_pair_inputs: numpy.ndarray, dipoleses: numpy.ndarray - ) -> numpy.ndarray: - """ - No error correction here baby. - """ - ps = dipoleses[:, :, 0:3] - ss = dipoleses[:, :, 3:6] - ws = dipoleses[:, :, 6] - - r1s = dot_pair_inputs[:, 0, 0:3] - r2s = dot_pair_inputs[:, 1, 0:3] - f1s = dot_pair_inputs[:, 0, 3] - # Don't actually need this - # f2s = dot_pair_inputs[:, 1, 3] - - diffses1 = r1s[:, None] - ss[:, None, :] - diffses2 = r2s[:, None] - ss[:, None, :] - - norms1 = numpy.linalg.norm(diffses1, axis=3) - norms2 = numpy.linalg.norm(diffses2, axis=3) - - alphses1 = ( - ( - 3 - * numpy.transpose( - numpy.transpose( - numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**2) - ) - * numpy.transpose(diffses1) - )[:, :, :, 0] - ) - - ps[:, :, 0, numpy.newaxis] - ) / (norms1**3) - alphses2 = ( - ( - 3 - * numpy.transpose( - numpy.transpose( - numpy.einsum("abcd,acd->abc", diffses2, ps) / (norms2**2) - ) - * numpy.transpose(diffses2) - )[:, :, :, 0] - ) - - ps[:, :, 0, numpy.newaxis] - ) / (norms2**3) - - bses = (1 / numpy.pi) * ( - ws[:, None, :] / (f1s[:, None] ** 2 + ws[:, None, :] ** 2) - ) - - return numpy.einsum("...j->...", alphses1 * alphses2 * bses) - def filter_samples(self, samples: ndarray) -> ndarray: current_sample = samples @@ -121,16 +197,8 @@ class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter): if len(current_sample) < 1: break - ### - # This should be abstracted out, but we're going to dump it here for time pressure's sake - ### - # vals = pdme.util.fast_nonlocal_spectrum.signarg( - # pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses( - # numpy.array([pi]), current_sample - # ) - # vals = pdme.util.fast_nonlocal_spectrum.signarg( - self.fast_s_spin_qubit_tarucha_nonlocal_dipoleses( + pdme.util.fast_nonlocal_spectrum.fast_s_spin_qubit_tarucha_nonlocal_dipoleses( numpy.array([pi]), current_sample ) ) diff --git a/deepdog/real_spectrum_run.py b/deepdog/real_spectrum_run.py index a54df6f..9e2e2cf 100644 --- a/deepdog/real_spectrum_run.py +++ b/deepdog/real_spectrum_run.py @@ -112,59 +112,6 @@ def get_a_result_fast_filter_tarucha_spin_qubit_pair_phase_only(input) -> int: seed, ) = input - def fast_s_spin_qubit_tarucha_nonlocal_dipoleses( - dot_pair_inputs: numpy.ndarray, dipoleses: numpy.ndarray - ) -> numpy.ndarray: - """ - No error correction here baby. - """ - ps = dipoleses[:, :, 0:3] - ss = dipoleses[:, :, 3:6] - ws = dipoleses[:, :, 6] - - r1s = dot_pair_inputs[:, 0, 0:3] - r2s = dot_pair_inputs[:, 1, 0:3] - f1s = dot_pair_inputs[:, 0, 3] - # don't actually need, because we're assuming they're the same frequencies across the pair - # f2s = dot_pair_inputs[:, 1, 3] - - diffses1 = r1s[:, None] - ss[:, None, :] - diffses2 = r2s[:, None] - ss[:, None, :] - - norms1 = numpy.linalg.norm(diffses1, axis=3) - norms2 = numpy.linalg.norm(diffses2, axis=3) - - alphses1 = ( - ( - 3 - * numpy.transpose( - numpy.transpose( - numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**2) - ) - * numpy.transpose(diffses1) - )[:, :, :, 0] - ) - - ps[:, numpy.newaxis, :, 0] - ) / (norms1**3) - alphses2 = ( - ( - 3 - * numpy.transpose( - numpy.transpose( - numpy.einsum("abcd,acd->abc", diffses2, ps) / (norms2**2) - ) - * numpy.transpose(diffses2) - )[:, :, :, 0] - ) - - ps[:, numpy.newaxis, :, 0] - ) / (norms2**3) - - bses = (1 / numpy.pi) * ( - ws[:, None, :] / (f1s[:, None] ** 2 + ws[:, None, :] ** 2) - ) - - return numpy.einsum("...j->...", alphses1 * alphses2 * bses) - rng = numpy.random.default_rng(seed) # TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models. sample_dipoles = model.get_monte_carlo_dipole_inputs( @@ -186,7 +133,7 @@ def get_a_result_fast_filter_tarucha_spin_qubit_pair_phase_only(input) -> int: # ) # vals = pdme.util.fast_nonlocal_spectrum.signarg( - fast_s_spin_qubit_tarucha_nonlocal_dipoleses( + pdme.util.fast_nonlocal_spectrum.fast_s_spin_qubit_tarucha_nonlocal_dipoleses( numpy.array([pi]), current_sample ) ) diff --git a/poetry.lock b/poetry.lock index d3e6023..193adce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -389,8 +389,11 @@ name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" optional = false -python-versions = "*" -files = [] +python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] [[package]] name = "dotty-dict"