feat: Adds more powerful direct mc runs to sub for old real spectrum run
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good

This commit is contained in:
Deepak Mallubhotla 2024-05-01 15:22:59 -05:00
parent cb166a399d
commit f2b1a1dd3b
Signed by: deepak
GPG Key ID: BEBAEBF28083E022
4 changed files with 330 additions and 167 deletions

View File

@ -1,13 +1,16 @@
import csv
import pdme.model
import pdme.measurement
import pdme.measurement.input_types
import pdme.subspace_simulation
from typing import Tuple, Dict, NewType, Any
import datetime
from typing import Tuple, Dict, NewType, Any, Sequence
from dataclasses import dataclass
import logging
import numpy
import numpy.random
import pdme.util.fast_v_calc
import multiprocessing
_logger = logging.getLogger(__name__)
@ -17,6 +20,7 @@ class DirectMonteCarloResult:
successes: int
monte_carlo_count: int
likelihood: float
model_name: str
@dataclass
@ -28,6 +32,10 @@ class DirectMonteCarloConfig:
monte_carlo_seed: int = 1234
write_successes_to_file: bool = False
tag: str = ""
cap_core_count: int = 0 # 0 means cap at num cores - 1
chunk_size: int = 50
write_bayesrun_file = True
# chunk size of some kind
# Aliasing dict as a generic data container
@ -51,8 +59,8 @@ class DirectMonteCarloRun:
Parameters
----------
model_name_pair : Sequence[Tuple(str, pdme.model.DipoleModel)]
The model to evaluate, with name.
model_name_pairs : Sequence[Tuple(str, pdme.model.DipoleModel)]
The models to evaluate, with names
measurements: Sequence[pdme.measurement.DotRangeMeasurement]
The measurements as dot ranges to use as the bounds for the Monte Carlo calculation.
@ -78,11 +86,11 @@ class DirectMonteCarloRun:
def __init__(
self,
model_name_pair: Tuple[str, pdme.model.DipoleModel],
model_name_pairs: Sequence[Tuple[str, pdme.model.DipoleModel]],
filter: DirectMonteCarloFilter,
config: DirectMonteCarloConfig,
):
self.model_name, self.model = model_name_pair
self.model_name_pairs = model_name_pairs
# self.measurements = measurements
# self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
@ -100,10 +108,16 @@ class DirectMonteCarloRun:
# self.measurements
# )
def _single_run(self, seed) -> numpy.ndarray:
def _single_run(
self, model_name_pair: Tuple[str, pdme.model.DipoleModel], seed
) -> numpy.ndarray:
rng = numpy.random.default_rng(seed)
sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
_, model = model_name_pair
# don't log here it's madness
# _logger.info(f"Executing for model {model_name}")
sample_dipoles = model.get_monte_carlo_dipole_inputs(
self.config.monte_carlo_count_per_cycle, -1, rng
)
@ -123,15 +137,38 @@ class DirectMonteCarloRun:
# ]
# return current_sample
def execute(self) -> DirectMonteCarloResult:
step_count = 0
total_success = 0
total_count = 0
def _wrapped_single_run(self, args: Tuple):
"""
single run wrapped up for multiprocessing call.
takes in a tuple of arguments corresponding to
(model_name_pair, seed)
"""
# here's where we do our work
model_name_pair, seed = args
cycle_success_configs = self._single_run(model_name_pair, seed)
cycle_success_count = len(cycle_success_configs)
return cycle_success_count
def execute_no_multiprocessing(self) -> Sequence[DirectMonteCarloResult]:
count_per_step = (
self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
)
seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed)
# core count etc. logic here
results = []
for model_name_pair in self.model_name_pairs:
step_count = 0
total_success = 0
total_count = 0
_logger.info(f"Working on model {model_name_pair[0]}")
# This is probably where multiprocessing logic should go
while (step_count < self.config.max_monte_carlo_cycles_steps) and (
total_success < self.config.target_success
):
@ -139,13 +176,14 @@ class DirectMonteCarloRun:
for cycle_i, seed in enumerate(
seed_sequence.spawn(self.config.monte_carlo_cycles)
):
cycle_success_configs = self._single_run(seed)
# here's where we do our work
cycle_success_configs = self._single_run(model_name_pair, seed)
cycle_success_count = len(cycle_success_configs)
if cycle_success_count > 0:
_logger.debug(
f"For cycle {cycle_i} received {cycle_success_count} successes"
)
_logger.debug(cycle_success_configs)
# _logger.debug(cycle_success_configs)
if self.config.write_successes_to_file:
sorted_by_freq = numpy.array(
[
@ -163,12 +201,119 @@ class DirectMonteCarloRun:
delimiter=",",
)
total_success += cycle_success_count
_logger.debug(f"At end of step {step_count} have {total_success} successes")
_logger.debug(
f"At end of step {step_count} have {total_success} successes"
)
step_count += 1
total_count += count_per_step
return DirectMonteCarloResult(
results.append(
DirectMonteCarloResult(
successes=total_success,
monte_carlo_count=total_count,
likelihood=total_success / total_count,
model_name=model_name_pair[0],
)
)
return results
def execute(self) -> Sequence[DirectMonteCarloResult]:
# set starting execution timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
count_per_step = (
self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
)
seed_sequence = numpy.random.SeedSequence(self.config.monte_carlo_seed)
# core count etc. logic here
core_count = multiprocessing.cpu_count() - 1 or 1
if (self.config.cap_core_count >= 1) and (
self.config.cap_core_count < core_count
):
core_count = self.config.cap_core_count
_logger.info(f"Using {core_count} cores")
results = []
with multiprocessing.Pool(core_count) as pool:
for model_name_pair in self.model_name_pairs:
_logger.info(f"Working on model {model_name_pair[0]}")
# This is probably where multiprocessing logic should go
step_count = 0
total_success = 0
total_count = 0
while (step_count < self.config.max_monte_carlo_cycles_steps) and (
total_success < self.config.target_success
):
step_count += 1
_logger.debug(f"Executing step {step_count}")
seeds = seed_sequence.spawn(self.config.monte_carlo_cycles)
pool_results = sum(
pool.imap_unordered(
self._wrapped_single_run,
[(model_name_pair, seed) for seed in seeds],
self.config.chunk_size,
)
)
_logger.debug(f"Pool results: {pool_results}")
total_success += pool_results
total_count += count_per_step
_logger.debug(
f"At end of step {step_count} have {total_success} successes"
)
results.append(
DirectMonteCarloResult(
successes=total_success,
monte_carlo_count=total_count,
likelihood=total_success / total_count,
model_name=model_name_pair[0],
)
)
if self.config.write_bayesrun_file:
filename = (
f"{timestamp}-{self.config.tag}.realdata.fast_filter.bayesrun.csv"
)
_logger.info(f"Going to write to file [{filename}]")
# row: Dict[str, Union[int, float, str]] = {}
row = {}
num_models = len(self.model_name_pairs)
success_weight = sum(
[
(res.successes / res.monte_carlo_count) / num_models
for res in results
]
)
for res in results:
row.update(
{
f"{res.model_name}_success": res.successes,
f"{res.model_name}_count": res.monte_carlo_count,
f"{res.model_name}_prob": (
res.successes / res.monte_carlo_count
)
/ (num_models * success_weight),
}
)
_logger.info(f"Writing row {row}")
fieldnames = list(row.keys())
with open(filename, "w", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect="unix")
writer.writeheader()
writer.writerow(row)
return results

View File

@ -39,6 +39,135 @@ class SingleDotPotentialFilter(DirectMonteCarloFilter):
return current_sample
class SingleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter):
def __init__(self, measurements: Sequence[pdme.measurement.DotRangeMeasurement]):
self.measurements = measurements
self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
self.dot_inputs
)
(
self.lows,
self.highs,
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
self.measurements
)
# oh no not this again
def fast_s_spin_qubit_tarucha_apsd_dipoleses(
self, dot_inputs: numpy.ndarray, dipoleses: numpy.ndarray
) -> numpy.ndarray:
"""
No error correction here baby.
"""
# We're going to annotate the indices on this class.
# Let's define some indices:
# A -> index of dipoleses configurations
# j -> within a particular configuration, indexes dipole j
# measurement_index -> if we have 100 frequencies for example, indexes which one of them it is
# If we need to use numbers, let's use A -> 2, j -> 10, measurement_index -> 9 for consistency with
# my other notes
# axes are [dipole_config_idx A, dipole_idx j, {px, py, pz}3]
ps = dipoleses[:, :, 0:3]
# axes are [dipole_config_idx A, dipole_idx j, {sx, sy, sz}3]
ss = dipoleses[:, :, 3:6]
# axes are [dipole_config_idx A, dipole_idx j, w], last axis is just 1
ws = dipoleses[:, :, 6]
# dot_index is either 0 or 1 for dot1 or dot2
# hopefully this adhoc grammar is making sense, with the explicit labelling of the values of the last axis in cartesian space
# axes are [measurement_idx, {dot_index}, {rx, ry, rz}] where the inner {dot_index} is gone
# [measurement_idx, cartesian3]
rs = dot_inputs[:, 0:3]
# axes are [measurement_idx]
fs = dot_inputs[:, 3]
# first operation!
# r1s has shape [measurement_idx, rxs]
# None inserts an extra axis so the r1s[:, None] has shape
# [measurement_idx, 1]([rxs]) with the last rxs hidden
#
# ss has shape [ A, j, {sx, sy, sz}3], so second term has shape [A, 1, j]([sxs])
# these broadcast from right to left
# [ measurement_idx, 1, rxs]
# [A, 1, j, sxs]
# resulting in [A, measurement_idx, j, cart3] sxs rxs are both cart3
diffses = rs[:, None] - ss[:, None, :]
# norms takes out axis 3, the last one, giving [A, measurement_idx, j]
norms = numpy.linalg.norm(diffses, axis=3)
# _logger.info(f"norms1: {norms1}")
# _logger.info(f"norms1 shape: {norms1.shape}")
#
# diffses1 (A, measurement_idx, j, xs)
# ps: (A, j, px)
# result is (A, measurement_idx, j)
# intermediate_dot_prod = numpy.einsum("abcd,acd->abc", diffses1, ps)
# _logger.info(f"dot product shape: {intermediate_dot_prod.shape}")
# transpose makes it (j, measurement_idx, A)
# transp_intermediate_dot_prod = numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3))
# transpose of diffses has shape (xs, j, measurement_idx, A)
# numpy.transpose(diffses1)
# _logger.info(f"dot product shape: {transp_intermediate_dot_prod.shape}")
# inner transpose is (j, measurement_idx, A) * (xs, j, measurement_idx, A)
# next transpose puts it back to (A, measurement_idx, j, xs)
# p_dot_r_times_r_term = 3 * numpy.transpose(numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3)) * numpy.transpose(diffses1))
# _logger.info(f"p_dot_r_times_r_term: {p_dot_r_times_r_term.shape}")
# only x axis puts us at (A, measurement_idx, j)
# p_dot_r_times_r_term_x_only = p_dot_r_times_r_term[:, :, :, 0]
# _logger.info(f"p_dot_r_times_r_term_x_only.shape: {p_dot_r_times_r_term_x_only.shape}")
# now to complete the numerator we subtract the ps, which are (A, j, px):
# slicing off the end gives us (A, j), so we newaxis to get (A, 1, j)
# _logger.info(ps[:, numpy.newaxis, :, 0].shape)
alphses = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses, ps) / (norms**2)
)
* numpy.transpose(diffses)
)[:, :, :, 0]
)
- ps[:, numpy.newaxis, :, 0]
) / (norms**3)
bses = (
2
* numpy.pi
* ws[:, None, :]
/ ((2 * numpy.pi * fs[:, None]) ** 2 + 4 * ws[:, None, :] ** 2)
)
return numpy.einsum("...j->...", alphses * alphses * bses)
def filter_samples(self, samples: ndarray) -> ndarray:
current_sample = samples
for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs):
if len(current_sample) < 1:
break
vals = self.fast_s_spin_qubit_tarucha_apsd_dipoleses(
numpy.array([di]), current_sample
)
# _logger.info(vals)
current_sample = current_sample[
numpy.all((vals > low) & (vals < high), axis=1)
]
# _logger.info(f"leaving with {len(current_sample)}")
return current_sample
class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter):
def __init__(
self,
@ -59,59 +188,6 @@ class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter):
self.pair_phase_measurements
)
def fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
self, dot_pair_inputs: numpy.ndarray, dipoleses: numpy.ndarray
) -> numpy.ndarray:
"""
No error correction here baby.
"""
ps = dipoleses[:, :, 0:3]
ss = dipoleses[:, :, 3:6]
ws = dipoleses[:, :, 6]
r1s = dot_pair_inputs[:, 0, 0:3]
r2s = dot_pair_inputs[:, 1, 0:3]
f1s = dot_pair_inputs[:, 0, 3]
# Don't actually need this
# f2s = dot_pair_inputs[:, 1, 3]
diffses1 = r1s[:, None] - ss[:, None, :]
diffses2 = r2s[:, None] - ss[:, None, :]
norms1 = numpy.linalg.norm(diffses1, axis=3)
norms2 = numpy.linalg.norm(diffses2, axis=3)
alphses1 = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**2)
)
* numpy.transpose(diffses1)
)[:, :, :, 0]
)
- ps[:, :, 0, numpy.newaxis]
) / (norms1**3)
alphses2 = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses2, ps) / (norms2**2)
)
* numpy.transpose(diffses2)
)[:, :, :, 0]
)
- ps[:, :, 0, numpy.newaxis]
) / (norms2**3)
bses = (1 / numpy.pi) * (
ws[:, None, :] / (f1s[:, None] ** 2 + ws[:, None, :] ** 2)
)
return numpy.einsum("...j->...", alphses1 * alphses2 * bses)
def filter_samples(self, samples: ndarray) -> ndarray:
current_sample = samples
@ -121,16 +197,8 @@ class DoubleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter):
if len(current_sample) < 1:
break
###
# This should be abstracted out, but we're going to dump it here for time pressure's sake
###
# vals = pdme.util.fast_nonlocal_spectrum.signarg(
# pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses(
# numpy.array([pi]), current_sample
# )
#
vals = pdme.util.fast_nonlocal_spectrum.signarg(
self.fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
pdme.util.fast_nonlocal_spectrum.fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
numpy.array([pi]), current_sample
)
)

View File

@ -112,59 +112,6 @@ def get_a_result_fast_filter_tarucha_spin_qubit_pair_phase_only(input) -> int:
seed,
) = input
def fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
dot_pair_inputs: numpy.ndarray, dipoleses: numpy.ndarray
) -> numpy.ndarray:
"""
No error correction here baby.
"""
ps = dipoleses[:, :, 0:3]
ss = dipoleses[:, :, 3:6]
ws = dipoleses[:, :, 6]
r1s = dot_pair_inputs[:, 0, 0:3]
r2s = dot_pair_inputs[:, 1, 0:3]
f1s = dot_pair_inputs[:, 0, 3]
# don't actually need, because we're assuming they're the same frequencies across the pair
# f2s = dot_pair_inputs[:, 1, 3]
diffses1 = r1s[:, None] - ss[:, None, :]
diffses2 = r2s[:, None] - ss[:, None, :]
norms1 = numpy.linalg.norm(diffses1, axis=3)
norms2 = numpy.linalg.norm(diffses2, axis=3)
alphses1 = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**2)
)
* numpy.transpose(diffses1)
)[:, :, :, 0]
)
- ps[:, numpy.newaxis, :, 0]
) / (norms1**3)
alphses2 = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses2, ps) / (norms2**2)
)
* numpy.transpose(diffses2)
)[:, :, :, 0]
)
- ps[:, numpy.newaxis, :, 0]
) / (norms2**3)
bses = (1 / numpy.pi) * (
ws[:, None, :] / (f1s[:, None] ** 2 + ws[:, None, :] ** 2)
)
return numpy.einsum("...j->...", alphses1 * alphses2 * bses)
rng = numpy.random.default_rng(seed)
# TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models.
sample_dipoles = model.get_monte_carlo_dipole_inputs(
@ -186,7 +133,7 @@ def get_a_result_fast_filter_tarucha_spin_qubit_pair_phase_only(input) -> int:
# )
#
vals = pdme.util.fast_nonlocal_spectrum.signarg(
fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
pdme.util.fast_nonlocal_spectrum.fast_s_spin_qubit_tarucha_nonlocal_dipoleses(
numpy.array([pi]), current_sample
)
)

7
poetry.lock generated
View File

@ -389,8 +389,11 @@ name = "docutils"
version = "0.20.1"
description = "Docutils -- Python Documentation Utilities"
optional = false
python-versions = "*"
files = []
python-versions = ">=3.7"
files = [
{file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"},
{file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"},
]
[[package]]
name = "dotty-dict"