Compare commits

..

15 Commits
1.0.0 ... 1.2.1

Author SHA1 Message Date
d258cfbec7 chore(release): 1.2.1
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-11 20:51:05 -05:00
b3bf4cde97 perf: precompile the magic regexes for probs parsing 2024-05-11 20:49:45 -05:00
60f29b0b2f perf: avoid recalculating product dict in indexifier to improve performance for probs 2024-05-11 20:49:26 -05:00
093a3fb5c4 chore(release): 1.2.0
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-08 22:24:28 -05:00
dc1d2d45a3 feat: adds additional matching regexes
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2024-05-08 22:23:57 -05:00
f0e2fa3da9 feat: adds magnitude enabled parsing option 2024-05-03 10:44:06 -05:00
2581e722e6 chore(release): 1.1.0
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-02 23:13:21 -05:00
62bd63bf9b refactor: removes redundant calculation and uses pdme 2024-05-02 23:12:21 -05:00
df4d0b5d15 deps: upgrades pdme dep 2024-05-02 22:40:06 -05:00
5361dada8b feat: removes legacy bayes run, technically breaking but just don't use them 2024-05-02 22:04:49 -05:00
29029c137a deps: upgrades pdme 2024-05-02 18:17:33 -05:00
fb018abeae feat: allows disabling timestamps in directmc bayesrun files
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2024-05-01 21:40:53 -05:00
d28c190816 chore(release): 1.0.1
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-01 21:08:00 -05:00
0262de060f nix: adds node for npx in dev shell 2024-05-01 21:07:48 -05:00
e25db1e0f6 fix: fixes issue of zero division error with no successes for anything
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2024-05-01 20:37:08 -05:00
16 changed files with 244 additions and 1204 deletions

View File

@@ -2,6 +2,31 @@
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)
### Features
* adds additional matching regexes ([dc1d2d4](https://gitea.deepak.science:2222/physics/deepdog/commit/dc1d2d45a3e631c5efccce80f8a24fa87c6089e0))
* adds magnitude enabled parsing option ([f0e2fa3](https://gitea.deepak.science:2222/physics/deepdog/commit/f0e2fa3da9f5a5136908d691137a904fda4e3a9a))
## [1.1.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.1...1.1.0) (2024-05-03)
### Features
* allows disabling timestamps in directmc bayesrun files ([fb018ab](https://gitea.deepak.science:2222/physics/deepdog/commit/fb018abeae2adf4438a030140a6c905f11bb6bc1))
* removes legacy bayes run, technically breaking but just don't use them ([5361dad](https://gitea.deepak.science:2222/physics/deepdog/commit/5361dada8be4950b5157862f6a92254b543889c3))
### [1.0.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.0...1.0.1) (2024-05-02)
### Bug Fixes
* fixes issue of zero division error with no successes for anything ([e25db1e](https://gitea.deepak.science:2222/physics/deepdog/commit/e25db1e0f677e8d9a657fa1631305cc8f05ff9ff))
## [1.0.0](https://gitea.deepak.science:2222/physics/deepdog/compare/0.8.1...1.0.0) (2024-05-01) ## [1.0.0](https://gitea.deepak.science:2222/physics/deepdog/compare/0.8.1...1.0.0) (2024-05-01)

View File

@@ -1,10 +1,7 @@
import logging import logging
from deepdog.meta import __version__ from deepdog.meta import __version__
from deepdog.bayes_run import BayesRun
from deepdog.bayes_run_simulpairs import BayesRunSimulPairs
from deepdog.real_spectrum_run import RealSpectrumRun from deepdog.real_spectrum_run import RealSpectrumRun
from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun
from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation
def get_version(): def get_version():
@@ -13,11 +10,8 @@ def get_version():
__all__ = [ __all__ = [
"get_version", "get_version",
"BayesRun",
"BayesRunSimulPairs",
"RealSpectrumRun", "RealSpectrumRun",
"TempAwareRealSpectrumRun", "TempAwareRealSpectrumRun",
"BayesRunWithSubspaceSimulation",
] ]

View File

@@ -1,281 +0,0 @@
import pdme.inputs
import pdme.model
import pdme.measurement.input_types
import pdme.measurement.oscillating_dipole
import pdme.util.fast_v_calc
import pdme.util.fast_nonlocal_spectrum
from typing import Sequence, Tuple, List
import datetime
import csv
import multiprocessing
import logging
import numpy
# TODO: remove hardcode
CHUNKSIZE = 50
# TODO: It's garbage to have this here duplicated from pdme.
DotInput = Tuple[numpy.typing.ArrayLike, float]
_logger = logging.getLogger(__name__)
def get_a_result(input) -> int:
model, dot_inputs, lows, highs, monte_carlo_count, max_frequency, seed = input
rng = numpy.random.default_rng(seed)
sample_dipoles = model.get_monte_carlo_dipole_inputs(
monte_carlo_count, max_frequency, rng_to_use=rng
)
vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles)
return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs))
def get_a_result_using_pairs(input) -> int:
(
model,
dot_inputs,
pair_inputs,
local_lows,
local_highs,
nonlocal_lows,
nonlocal_highs,
monte_carlo_count,
max_frequency,
) = input
sample_dipoles = model.get_n_single_dipoles(monte_carlo_count, max_frequency)
local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoles(dot_inputs, sample_dipoles)
local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs)
nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal(
pair_inputs, sample_dipoles
)
nonlocal_matches = pdme.util.fast_v_calc.between(
nonlocal_vals, nonlocal_lows, nonlocal_highs
)
combined_matches = numpy.logical_and(local_matches, nonlocal_matches)
return numpy.count_nonzero(combined_matches)
class BayesRun:
"""
A single Bayes run for a given set of dots.
Parameters
----------
dot_inputs : Sequence[DotInput]
The dot inputs for this bayes run.
models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)]
The models to evaluate.
actual_model : pdme.model.DipoleModel
The model which is actually correct.
filename_slug : str
The filename slug to include.
run_count: int
The number of runs to do.
"""
def __init__(
self,
dot_positions: Sequence[numpy.typing.ArrayLike],
frequency_range: Sequence[float],
models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]],
actual_model: pdme.model.DipoleModel,
filename_slug: str,
run_count: int = 100,
low_error: float = 0.9,
high_error: float = 1.1,
monte_carlo_count: int = 10000,
monte_carlo_cycles: int = 10,
target_success: int = 100,
max_monte_carlo_cycles_steps: int = 10,
max_frequency: float = 20,
end_threshold: float = None,
chunksize: int = CHUNKSIZE,
) -> None:
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
dot_positions, frequency_range
)
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
self.dot_inputs
)
self.models = [model for (_, model) in models_with_names]
self.model_names = [name for (name, _) in models_with_names]
self.actual_model = actual_model
self.n: int
try:
self.n = self.actual_model.n # type: ignore
except AttributeError:
self.n = 1
self.model_count = len(self.models)
self.monte_carlo_count = monte_carlo_count
self.monte_carlo_cycles = monte_carlo_cycles
self.target_success = target_success
self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps
self.run_count = run_count
self.low_error = low_error
self.high_error = high_error
self.csv_fields = []
for i in range(self.n):
self.csv_fields.extend(
[
f"dipole_moment_{i+1}",
f"dipole_location_{i+1}",
f"dipole_frequency_{i+1}",
]
)
self.compensate_zeros = True
self.chunksize = chunksize
for name in self.model_names:
self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"])
self.probabilities = [1 / self.model_count] * self.model_count
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
self.filename = f"{timestamp}-{filename_slug}.bayesrun.csv"
self.max_frequency = max_frequency
if end_threshold is not None:
if 0 < end_threshold < 1:
self.end_threshold: float = end_threshold
self.use_end_threshold = True
_logger.info(f"Will abort early, at {self.end_threshold}.")
else:
raise ValueError(
f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
)
def go(self) -> None:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
writer.writeheader()
for run in range(1, self.run_count + 1):
# Generate the actual dipoles
actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
dots = actual_dipoles.get_percent_range_dot_measurements(
self.dot_inputs, self.low_error, self.high_error
)
(
lows,
highs,
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
dots
)
_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
# define a new seed sequence for each run
seed_sequence = numpy.random.SeedSequence(run)
results = []
_logger.debug("Going to iterate over models now")
for model_count, model in enumerate(self.models):
_logger.debug(f"Doing model #{model_count}")
core_count = multiprocessing.cpu_count() - 1 or 1
with multiprocessing.Pool(core_count) as pool:
cycle_count = 0
cycle_success = 0
cycles = 0
while (cycles < self.max_monte_carlo_cycles_steps) and (
cycle_success <= self.target_success
):
_logger.debug(f"Starting cycle {cycles}")
cycles += 1
current_success = 0
cycle_count += self.monte_carlo_count * self.monte_carlo_cycles
# generate a seed from the sequence for each core.
# note this needs to be inside the loop for monte carlo cycle steps!
# that way we get more stuff.
seeds = seed_sequence.spawn(self.monte_carlo_cycles)
current_success = sum(
pool.imap_unordered(
get_a_result,
[
(
model,
self.dot_inputs_array,
lows,
highs,
self.monte_carlo_count,
self.max_frequency,
seed,
)
for seed in seeds
],
self.chunksize,
)
)
cycle_success += current_success
_logger.debug(f"current running successes: {cycle_success}")
results.append((cycle_count, cycle_success))
_logger.debug("Done, constructing output now")
row = {
"dipole_moment_1": actual_dipoles.dipoles[0].p,
"dipole_location_1": actual_dipoles.dipoles[0].s,
"dipole_frequency_1": actual_dipoles.dipoles[0].w,
}
for i in range(1, self.n):
try:
current_dipoles = actual_dipoles.dipoles[i]
row[f"dipole_moment_{i+1}"] = current_dipoles.p
row[f"dipole_location_{i+1}"] = current_dipoles.s
row[f"dipole_frequency_{i+1}"] = current_dipoles.w
except IndexError:
_logger.info(f"Not writing anymore, saw end after {i}")
break
successes: List[float] = []
counts: List[int] = []
for model_index, (name, (count, result)) in enumerate(
zip(self.model_names, results)
):
row[f"{name}_success"] = result
row[f"{name}_count"] = count
successes.append(max(result, 0.5))
counts.append(count)
success_weight = sum(
[
(succ / count) * prob
for succ, count, prob in zip(successes, counts, self.probabilities)
]
)
new_probabilities = [
(succ / count) * old_prob / success_weight
for succ, count, old_prob in zip(successes, counts, self.probabilities)
]
self.probabilities = new_probabilities
for name, probability in zip(self.model_names, self.probabilities):
row[f"{name}_prob"] = probability
_logger.info(row)
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writerow(row)
if self.use_end_threshold:
max_prob = max(self.probabilities)
if max_prob > self.end_threshold:
_logger.info(
f"Aborting early, because {max_prob} is greater than {self.end_threshold}"
)
break

View File

@@ -1,382 +0,0 @@
import pdme.inputs
import pdme.model
import pdme.measurement.input_types
import pdme.measurement.oscillating_dipole
import pdme.util.fast_v_calc
import pdme.util.fast_nonlocal_spectrum
from typing import Sequence, Tuple, List
import datetime
import csv
import multiprocessing
import logging
import numpy
import numpy.random
# TODO: remove hardcode
CHUNKSIZE = 50
# TODO: It's garbage to have this here duplicated from pdme.
DotInput = Tuple[numpy.typing.ArrayLike, float]
_logger = logging.getLogger(__name__)
def get_a_simul_result_using_pairs(input) -> numpy.ndarray:
(
model,
dot_inputs,
pair_inputs,
local_lows,
local_highs,
nonlocal_lows,
nonlocal_highs,
monte_carlo_count,
monte_carlo_cycles,
max_frequency,
seed,
) = input
rng = numpy.random.default_rng(seed)
local_total = 0
combined_total = 0
sample_dipoles = model.get_monte_carlo_dipole_inputs(
monte_carlo_count, max_frequency, rng_to_use=rng
)
local_vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles)
local_matches = pdme.util.fast_v_calc.between(local_vals, local_lows, local_highs)
nonlocal_vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses(
pair_inputs, sample_dipoles
)
nonlocal_matches = pdme.util.fast_v_calc.between(
nonlocal_vals, nonlocal_lows, nonlocal_highs
)
combined_matches = numpy.logical_and(local_matches, nonlocal_matches)
local_total += numpy.count_nonzero(local_matches)
combined_total += numpy.count_nonzero(combined_matches)
return numpy.array([local_total, combined_total])
class BayesRunSimulPairs:
"""
A dual pairs-nonpairs Bayes run for a given set of dots.
Parameters
----------
dot_inputs : Sequence[DotInput]
The dot inputs for this bayes run.
models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)]
The models to evaluate.
actual_model : pdme.model.DipoleModel
The modoel for the model which is actually correct.
filename_slug : str
The filename slug to include.
run_count: int
The number of runs to do.
"""
def __init__(
self,
dot_positions: Sequence[numpy.typing.ArrayLike],
frequency_range: Sequence[float],
models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]],
actual_model: pdme.model.DipoleModel,
filename_slug: str,
run_count: int = 100,
low_error: float = 0.9,
high_error: float = 1.1,
pairs_high_error=None,
pairs_low_error=None,
monte_carlo_count: int = 10000,
monte_carlo_cycles: int = 10,
target_success: int = 100,
max_monte_carlo_cycles_steps: int = 10,
max_frequency: float = 20,
end_threshold: float = None,
chunksize: int = CHUNKSIZE,
) -> None:
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
dot_positions, frequency_range
)
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
self.dot_inputs
)
self.dot_pair_inputs = pdme.inputs.input_pairs_with_frequency_range(
dot_positions, frequency_range
)
self.dot_pair_inputs_array = (
pdme.measurement.input_types.dot_pair_inputs_to_array(self.dot_pair_inputs)
)
self.models = [mod for (_, mod) in models_with_names]
self.model_names = [name for (name, _) in models_with_names]
self.actual_model = actual_model
self.n: int
try:
self.n = self.actual_model.n # type: ignore
except AttributeError:
self.n = 1
self.model_count = len(self.models)
self.monte_carlo_count = monte_carlo_count
self.monte_carlo_cycles = monte_carlo_cycles
self.target_success = target_success
self.max_monte_carlo_cycles_steps = max_monte_carlo_cycles_steps
self.run_count = run_count
self.low_error = low_error
self.high_error = high_error
if pairs_low_error is None:
self.pairs_low_error = self.low_error
else:
self.pairs_low_error = pairs_low_error
if pairs_high_error is None:
self.pairs_high_error = self.high_error
else:
self.pairs_high_error = pairs_high_error
self.csv_fields = []
for i in range(self.n):
self.csv_fields.extend(
[
f"dipole_moment_{i+1}",
f"dipole_location_{i+1}",
f"dipole_frequency_{i+1}",
]
)
self.compensate_zeros = True
self.chunksize = chunksize
for name in self.model_names:
self.csv_fields.extend([f"{name}_success", f"{name}_count", f"{name}_prob"])
self.probabilities_no_pairs = [1 / self.model_count] * self.model_count
self.probabilities_pairs = [1 / self.model_count] * self.model_count
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
self.filename_pairs = f"{timestamp}-{filename_slug}.simulpairs.yespairs.csv"
self.filename_no_pairs = f"{timestamp}-{filename_slug}.simulpairs.noopairs.csv"
self.max_frequency = max_frequency
if end_threshold is not None:
if 0 < end_threshold < 1:
self.end_threshold: float = end_threshold
self.use_end_threshold = True
_logger.info(f"Will abort early, at {self.end_threshold}.")
else:
raise ValueError(
f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
)
def go(self) -> None:
with open(self.filename_pairs, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
writer.writeheader()
with open(self.filename_no_pairs, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
writer.writeheader()
for run in range(1, self.run_count + 1):
# Generate the actual dipoles
actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
dots = actual_dipoles.get_percent_range_dot_measurements(
self.dot_inputs, self.low_error, self.high_error
)
(
lows,
highs,
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
dots
)
pair_lows, pair_highs = (None, None)
pair_measurements = actual_dipoles.get_percent_range_dot_pair_measurements(
self.dot_pair_inputs, self.pairs_low_error, self.pairs_high_error
)
(
pair_lows,
pair_highs,
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
pair_measurements
)
_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
# define a new seed sequence for each run
seed_sequence = numpy.random.SeedSequence(run)
results_pairs = []
results_no_pairs = []
_logger.debug("Going to iterate over models now")
for model_count, model in enumerate(self.models):
_logger.debug(f"Doing model #{model_count}")
core_count = multiprocessing.cpu_count() - 1 or 1
with multiprocessing.Pool(core_count) as pool:
cycle_count = 0
cycle_success_pairs = 0
cycle_success_no_pairs = 0
cycles = 0
while (cycles < self.max_monte_carlo_cycles_steps) and (
min(cycle_success_pairs, cycle_success_no_pairs)
<= self.target_success
):
_logger.debug(f"Starting cycle {cycles}")
cycles += 1
current_success_pairs = 0
current_success_no_pairs = 0
cycle_count += self.monte_carlo_count * self.monte_carlo_cycles
# generate a seed from the sequence for each core.
# note this needs to be inside the loop for monte carlo cycle steps!
# that way we get more stuff.
seeds = seed_sequence.spawn(self.monte_carlo_cycles)
_logger.debug(f"Creating {self.monte_carlo_cycles} seeds")
current_success_both = numpy.array(
sum(
pool.imap_unordered(
get_a_simul_result_using_pairs,
[
(
model,
self.dot_inputs_array,
self.dot_pair_inputs_array,
lows,
highs,
pair_lows,
pair_highs,
self.monte_carlo_count,
self.monte_carlo_cycles,
self.max_frequency,
seed,
)
for seed in seeds
],
self.chunksize,
)
)
)
current_success_no_pairs = current_success_both[0]
current_success_pairs = current_success_both[1]
cycle_success_no_pairs += current_success_no_pairs
cycle_success_pairs += current_success_pairs
_logger.debug(
f"(pair, no_pair) successes are {(cycle_success_pairs, cycle_success_no_pairs)}"
)
results_pairs.append((cycle_count, cycle_success_pairs))
results_no_pairs.append((cycle_count, cycle_success_no_pairs))
_logger.debug("Done, constructing output now")
row_pairs = {
"dipole_moment_1": actual_dipoles.dipoles[0].p,
"dipole_location_1": actual_dipoles.dipoles[0].s,
"dipole_frequency_1": actual_dipoles.dipoles[0].w,
}
row_no_pairs = {
"dipole_moment_1": actual_dipoles.dipoles[0].p,
"dipole_location_1": actual_dipoles.dipoles[0].s,
"dipole_frequency_1": actual_dipoles.dipoles[0].w,
}
for i in range(1, self.n):
try:
current_dipoles = actual_dipoles.dipoles[i]
row_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p
row_pairs[f"dipole_location_{i+1}"] = current_dipoles.s
row_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w
row_no_pairs[f"dipole_moment_{i+1}"] = current_dipoles.p
row_no_pairs[f"dipole_location_{i+1}"] = current_dipoles.s
row_no_pairs[f"dipole_frequency_{i+1}"] = current_dipoles.w
except IndexError:
_logger.info(f"Not writing anymore, saw end after {i}")
break
successes_pairs: List[float] = []
successes_no_pairs: List[float] = []
counts: List[int] = []
for model_index, (
name,
(count_pair, result_pair),
(count_no_pair, result_no_pair),
) in enumerate(zip(self.model_names, results_pairs, results_no_pairs)):
row_pairs[f"{name}_success"] = result_pair
row_pairs[f"{name}_count"] = count_pair
successes_pairs.append(max(result_pair, 0.5))
row_no_pairs[f"{name}_success"] = result_no_pair
row_no_pairs[f"{name}_count"] = count_no_pair
successes_no_pairs.append(max(result_no_pair, 0.5))
counts.append(count_pair)
success_weight_pair = sum(
[
(succ / count) * prob
for succ, count, prob in zip(
successes_pairs, counts, self.probabilities_pairs
)
]
)
success_weight_no_pair = sum(
[
(succ / count) * prob
for succ, count, prob in zip(
successes_no_pairs, counts, self.probabilities_no_pairs
)
]
)
new_probabilities_pair = [
(succ / count) * old_prob / success_weight_pair
for succ, count, old_prob in zip(
successes_pairs, counts, self.probabilities_pairs
)
]
new_probabilities_no_pair = [
(succ / count) * old_prob / success_weight_no_pair
for succ, count, old_prob in zip(
successes_no_pairs, counts, self.probabilities_no_pairs
)
]
self.probabilities_pairs = new_probabilities_pair
self.probabilities_no_pairs = new_probabilities_no_pair
for name, probability_pair, probability_no_pair in zip(
self.model_names, self.probabilities_pairs, self.probabilities_no_pairs
):
row_pairs[f"{name}_prob"] = probability_pair
row_no_pairs[f"{name}_prob"] = probability_no_pair
_logger.debug(row_pairs)
_logger.debug(row_no_pairs)
with open(self.filename_pairs, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writerow(row_pairs)
with open(self.filename_no_pairs, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writerow(row_no_pairs)
if self.use_end_threshold:
max_prob = min(
max(self.probabilities_pairs), max(self.probabilities_no_pairs)
)
if max_prob > self.end_threshold:
_logger.info(
f"Aborting early, because {max_prob} is greater than {self.end_threshold}"
)
break

View File

@@ -1,261 +0,0 @@
import deepdog.subset_simulation
import pdme.inputs
import pdme.model
import pdme.measurement.input_types
import pdme.measurement.oscillating_dipole
import pdme.util.fast_v_calc
import pdme.util.fast_nonlocal_spectrum
from typing import Sequence, Tuple, List, Optional
import datetime
import csv
import logging
import numpy
import numpy.typing
# TODO: remove hardcode
CHUNKSIZE = 50
# TODO: It's garbage to have this here duplicated from pdme.
DotInput = Tuple[numpy.typing.ArrayLike, float]
CLAMPING_FACTOR = 10
_logger = logging.getLogger(__name__)
class BayesRunWithSubspaceSimulation:
"""
A single Bayes run for a given set of dots.
Parameters
----------
dot_inputs : Sequence[DotInput]
The dot inputs for this bayes run.
models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)]
The models to evaluate.
actual_model : pdme.model.DipoleModel
The model which is actually correct.
filename_slug : str
The filename slug to include.
run_count: int
The number of runs to do.
"""
def __init__(
self,
dot_positions: Sequence[numpy.typing.ArrayLike],
frequency_range: Sequence[float],
models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]],
actual_model: pdme.model.DipoleModel,
filename_slug: str,
max_frequency: float = 20,
end_threshold: float = None,
run_count=100,
chunksize: int = CHUNKSIZE,
ss_n_c: int = 500,
ss_n_s: int = 100,
ss_m_max: int = 15,
ss_target_cost: Optional[float] = None,
ss_level_0_seed: int = 200,
ss_mcmc_seed: int = 20,
ss_use_adaptive_steps=True,
ss_default_phi_step=0.01,
ss_default_theta_step=0.01,
ss_default_r_step=0.01,
ss_default_w_log_step=0.01,
ss_default_upper_w_log_step=4,
ss_dump_last_generation=False,
ss_initial_costs_chunk_size=100,
write_output_to_bayesruncsv=True,
use_timestamp_for_output=True,
) -> None:
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
dot_positions, frequency_range
)
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
self.dot_inputs
)
self.models_with_names = models_with_names
self.models = [model for (_, model) in models_with_names]
self.model_names = [name for (name, _) in models_with_names]
self.actual_model = actual_model
self.n: int
try:
self.n = self.actual_model.n # type: ignore
except AttributeError:
self.n = 1
self.model_count = len(self.models)
self.csv_fields = []
for i in range(self.n):
self.csv_fields.extend(
[
f"dipole_moment_{i+1}",
f"dipole_location_{i+1}",
f"dipole_frequency_{i+1}",
]
)
self.compensate_zeros = True
self.chunksize = chunksize
for name in self.model_names:
self.csv_fields.extend([f"{name}_likelihood", f"{name}_prob"])
self.probabilities = [1 / self.model_count] * self.model_count
if use_timestamp_for_output:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
else:
self.filename = f"{filename_slug}.bayesrunwithss.csv"
self.max_frequency = max_frequency
if end_threshold is not None:
if 0 < end_threshold < 1:
self.end_threshold: float = end_threshold
self.use_end_threshold = True
_logger.info(f"Will abort early, at {self.end_threshold}.")
else:
raise ValueError(
f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
)
self.ss_n_c = ss_n_c
self.ss_n_s = ss_n_s
self.ss_m_max = ss_m_max
self.ss_target_cost = ss_target_cost
self.ss_level_0_seed = ss_level_0_seed
self.ss_mcmc_seed = ss_mcmc_seed
self.ss_use_adaptive_steps = ss_use_adaptive_steps
self.ss_default_phi_step = ss_default_phi_step
self.ss_default_theta_step = ss_default_theta_step
self.ss_default_r_step = ss_default_r_step
self.ss_default_w_log_step = ss_default_w_log_step
self.ss_default_upper_w_log_step = ss_default_upper_w_log_step
self.ss_dump_last_generation = ss_dump_last_generation
self.ss_initial_costs_chunk_size = ss_initial_costs_chunk_size
self.run_count = run_count
self.write_output_to_csv = write_output_to_bayesruncsv
def go(self) -> Sequence:
if self.write_output_to_csv:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writeheader()
return_result = []
for run in range(1, self.run_count + 1):
# Generate the actual dipoles
actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
measurements = actual_dipoles.get_dot_measurements(self.dot_inputs)
_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
# define a new seed sequence for each run
results = []
_logger.debug("Going to iterate over models now")
for model_count, model in enumerate(self.models_with_names):
_logger.debug(f"Doing model #{model_count}, {model[0]}")
subset_run = deepdog.subset_simulation.SubsetSimulation(
model,
self.dot_inputs,
measurements,
self.ss_n_c,
self.ss_n_s,
self.ss_m_max,
self.ss_target_cost,
self.ss_level_0_seed,
self.ss_mcmc_seed,
self.ss_use_adaptive_steps,
self.ss_default_phi_step,
self.ss_default_theta_step,
self.ss_default_r_step,
self.ss_default_w_log_step,
self.ss_default_upper_w_log_step,
initial_cost_chunk_size=self.ss_initial_costs_chunk_size,
keep_probs_list=False,
dump_last_generation_to_file=self.ss_dump_last_generation,
)
results.append(subset_run.execute())
_logger.debug("Done, constructing output now")
row = {
"dipole_moment_1": actual_dipoles.dipoles[0].p,
"dipole_location_1": actual_dipoles.dipoles[0].s,
"dipole_frequency_1": actual_dipoles.dipoles[0].w,
}
for i in range(1, self.n):
try:
current_dipoles = actual_dipoles.dipoles[i]
row[f"dipole_moment_{i+1}"] = current_dipoles.p
row[f"dipole_location_{i+1}"] = current_dipoles.s
row[f"dipole_frequency_{i+1}"] = current_dipoles.w
except IndexError:
_logger.info(f"Not writing anymore, saw end after {i}")
break
likelihoods: List[float] = []
for (name, result) in zip(self.model_names, results):
if result.over_target_likelihood is None:
if result.lowest_likelihood is None:
_logger.error(f"result {result} looks bad")
clamped_likelihood = 10**-15
else:
clamped_likelihood = result.lowest_likelihood / CLAMPING_FACTOR
_logger.warning(
f"got a none result, clamping to {clamped_likelihood}"
)
else:
clamped_likelihood = result.over_target_likelihood
likelihoods.append(clamped_likelihood)
row[f"{name}_likelihood"] = clamped_likelihood
success_weight = sum(
[
likelihood * prob
for likelihood, prob in zip(likelihoods, self.probabilities)
]
)
new_probabilities = [
likelihood * old_prob / success_weight
for likelihood, old_prob in zip(likelihoods, self.probabilities)
]
self.probabilities = new_probabilities
for name, probability in zip(self.model_names, self.probabilities):
row[f"{name}_prob"] = probability
_logger.info(row)
return_result.append(row)
if self.write_output_to_csv:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writerow(row)
if self.use_end_threshold:
max_prob = max(self.probabilities)
if max_prob > self.end_threshold:
_logger.info(
f"Aborting early, because {max_prob} is greater than {self.end_threshold}"
)
break
return return_result

View File

@@ -14,6 +14,8 @@ import multiprocessing
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
ANTI_ZERO_SUCCESS_THRES = 0.1
@dataclass @dataclass
class DirectMonteCarloResult: class DirectMonteCarloResult:
@@ -35,6 +37,7 @@ class DirectMonteCarloConfig:
cap_core_count: int = 0 # 0 means cap at num cores - 1 cap_core_count: int = 0 # 0 means cap at num cores - 1
chunk_size: int = 50 chunk_size: int = 50
write_bayesrun_file = True write_bayesrun_file = True
bayesrun_file_timestamp = True
# chunk size of some kind # chunk size of some kind
@@ -282,9 +285,14 @@ class DirectMonteCarloRun:
if self.config.write_bayesrun_file: if self.config.write_bayesrun_file:
if self.config.bayesrun_file_timestamp:
timestamp_str = f"{timestamp}-"
else:
timestamp_str = ""
filename = ( filename = (
f"{timestamp}-{self.config.tag}.realdata.fast_filter.bayesrun.csv" f"{timestamp_str}{self.config.tag}.realdata.fast_filter.bayesrun.csv"
) )
_logger.info(f"Going to write to file [{filename}]") _logger.info(f"Going to write to file [{filename}]")
# row: Dict[str, Union[int, float, str]] = {} # row: Dict[str, Union[int, float, str]] = {}
row = {} row = {}
@@ -292,7 +300,11 @@ class DirectMonteCarloRun:
num_models = len(self.model_name_pairs) num_models = len(self.model_name_pairs)
success_weight = sum( success_weight = sum(
[ [
(res.successes / res.monte_carlo_count) / num_models (
max(ANTI_ZERO_SUCCESS_THRES, res.successes)
/ res.monte_carlo_count
)
/ num_models
for res in results for res in results
] ]
) )
@@ -303,7 +315,8 @@ class DirectMonteCarloRun:
f"{res.model_name}_success": res.successes, f"{res.model_name}_success": res.successes,
f"{res.model_name}_count": res.monte_carlo_count, f"{res.model_name}_count": res.monte_carlo_count,
f"{res.model_name}_prob": ( f"{res.model_name}_prob": (
res.successes / res.monte_carlo_count max(ANTI_ZERO_SUCCESS_THRES, res.successes)
/ res.monte_carlo_count
) )
/ (num_models * success_weight), / (num_models * success_weight),
} }

View File

@@ -54,109 +54,13 @@ class SingleDotSpinQubitFrequencyFilter(DirectMonteCarloFilter):
self.measurements self.measurements
) )
# oh no not this again
def fast_s_spin_qubit_tarucha_apsd_dipoleses(
self, dot_inputs: numpy.ndarray, dipoleses: numpy.ndarray
) -> numpy.ndarray:
"""
No error correction here baby.
"""
# We're going to annotate the indices on this class.
# Let's define some indices:
# A -> index of dipoleses configurations
# j -> within a particular configuration, indexes dipole j
# measurement_index -> if we have 100 frequencies for example, indexes which one of them it is
# If we need to use numbers, let's use A -> 2, j -> 10, measurement_index -> 9 for consistency with
# my other notes
# axes are [dipole_config_idx A, dipole_idx j, {px, py, pz}3]
ps = dipoleses[:, :, 0:3]
# axes are [dipole_config_idx A, dipole_idx j, {sx, sy, sz}3]
ss = dipoleses[:, :, 3:6]
# axes are [dipole_config_idx A, dipole_idx j, w], last axis is just 1
ws = dipoleses[:, :, 6]
# dot_index is either 0 or 1 for dot1 or dot2
# hopefully this adhoc grammar is making sense, with the explicit labelling of the values of the last axis in cartesian space
# axes are [measurement_idx, {dot_index}, {rx, ry, rz}] where the inner {dot_index} is gone
# [measurement_idx, cartesian3]
rs = dot_inputs[:, 0:3]
# axes are [measurement_idx]
fs = dot_inputs[:, 3]
# first operation!
# r1s has shape [measurement_idx, rxs]
# None inserts an extra axis so the r1s[:, None] has shape
# [measurement_idx, 1]([rxs]) with the last rxs hidden
#
# ss has shape [ A, j, {sx, sy, sz}3], so second term has shape [A, 1, j]([sxs])
# these broadcast from right to left
# [ measurement_idx, 1, rxs]
# [A, 1, j, sxs]
# resulting in [A, measurement_idx, j, cart3] sxs rxs are both cart3
diffses = rs[:, None] - ss[:, None, :]
# norms takes out axis 3, the last one, giving [A, measurement_idx, j]
norms = numpy.linalg.norm(diffses, axis=3)
# _logger.info(f"norms1: {norms1}")
# _logger.info(f"norms1 shape: {norms1.shape}")
#
# diffses1 (A, measurement_idx, j, xs)
# ps: (A, j, px)
# result is (A, measurement_idx, j)
# intermediate_dot_prod = numpy.einsum("abcd,acd->abc", diffses1, ps)
# _logger.info(f"dot product shape: {intermediate_dot_prod.shape}")
# transpose makes it (j, measurement_idx, A)
# transp_intermediate_dot_prod = numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3))
# transpose of diffses has shape (xs, j, measurement_idx, A)
# numpy.transpose(diffses1)
# _logger.info(f"dot product shape: {transp_intermediate_dot_prod.shape}")
# inner transpose is (j, measurement_idx, A) * (xs, j, measurement_idx, A)
# next transpose puts it back to (A, measurement_idx, j, xs)
# p_dot_r_times_r_term = 3 * numpy.transpose(numpy.transpose(numpy.einsum("abcd,acd->abc", diffses1, ps) / (norms1**3)) * numpy.transpose(diffses1))
# _logger.info(f"p_dot_r_times_r_term: {p_dot_r_times_r_term.shape}")
# only x axis puts us at (A, measurement_idx, j)
# p_dot_r_times_r_term_x_only = p_dot_r_times_r_term[:, :, :, 0]
# _logger.info(f"p_dot_r_times_r_term_x_only.shape: {p_dot_r_times_r_term_x_only.shape}")
# now to complete the numerator we subtract the ps, which are (A, j, px):
# slicing off the end gives us (A, j), so we newaxis to get (A, 1, j)
# _logger.info(ps[:, numpy.newaxis, :, 0].shape)
alphses = (
(
3
* numpy.transpose(
numpy.transpose(
numpy.einsum("abcd,acd->abc", diffses, ps) / (norms**2)
)
* numpy.transpose(diffses)
)[:, :, :, 0]
)
- ps[:, numpy.newaxis, :, 0]
) / (norms**3)
bses = (
2
* numpy.pi
* ws[:, None, :]
/ ((2 * numpy.pi * fs[:, None]) ** 2 + 4 * ws[:, None, :] ** 2)
)
return numpy.einsum("...j->...", alphses * alphses * bses)
def filter_samples(self, samples: ndarray) -> ndarray: def filter_samples(self, samples: ndarray) -> ndarray:
current_sample = samples current_sample = samples
for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs): for di, low, high in zip(self.dot_inputs_array, self.lows, self.highs):
if len(current_sample) < 1: if len(current_sample) < 1:
break break
vals = self.fast_s_spin_qubit_tarucha_apsd_dipoleses( vals = pdme.util.fast_v_calc.fast_efieldxs_for_dipoleses(
numpy.array([di]), current_sample numpy.array([di]), current_sample
) )
# _logger.info(vals) # _logger.info(vals)

View File

@@ -31,10 +31,10 @@ class Indexifier:
def __init__(self, list_dict: typing.Dict[str, typing.Sequence]): def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
self.dict = list_dict self.dict = list_dict
self.product_dict = _dict_product(self.dict)
def indexify(self, n: int) -> typing.Dict[str, typing.Any]: def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
product_dict = _dict_product(self.dict) return self.product_dict[n]
return product_dict[n]
def _indexify_indices(self, n: int) -> typing.Sequence[int]: def _indexify_indices(self, n: int) -> typing.Sequence[int]:
""" """

View File

@@ -8,17 +8,30 @@ import csv
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv" FILENAME_REGEX = re.compile(
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
)
MODEL_REGEXES = [ MODEL_REGEXES = [
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)" re.compile(pattern)
for pattern in [
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
]
] ]
FILE_SLUG_REGEXES = [ FILE_SLUG_REGEXES = [
r"mock_tarucha-(?P<job_index>\d+)", re.compile(pattern)
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)", for pattern in [
r"(?P<tag>\w+)-(?P<job_index>\d+)",
r"mock_tarucha-(?P<job_index>\d+)",
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
]
] ]
SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
@dataclasses.dataclass @dataclasses.dataclass
class BayesrunOutputFilename: class BayesrunOutputFilename:
@@ -27,7 +40,6 @@ class BayesrunOutputFilename:
path: pathlib.Path path: pathlib.Path
@dataclasses.dataclass
class BayesrunColumnParsed: class BayesrunColumnParsed:
""" """
class for parsing a bayesrun while pulling certain special fields out class for parsing a bayesrun while pulling certain special fields out
@@ -38,10 +50,21 @@ class BayesrunColumnParsed:
self.model_field_dict = { self.model_field_dict = {
k: v for k, v in groupdict.items() if k != "field_name" k: v for k, v in groupdict.items() if k != "field_name"
} }
self._groupdict_str = repr(groupdict)
def __str__(self): def __str__(self):
return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]" return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
def __repr__(self):
return f"BayesrunColumnParsed({self._groupdict_str})"
def __eq__(self, other):
if isinstance(other, BayesrunColumnParsed):
return (self.column_field == other.column_field) and (
self.model_field_dict == other.model_field_dict
)
return NotImplemented
@dataclasses.dataclass @dataclasses.dataclass
class BayesrunModelResult: class BayesrunModelResult:
@@ -73,7 +96,7 @@ def _parse_bayesrun_column(
Returns the groupdict for the first match, or None if no match found. Returns the groupdict for the first match, or None if no match found.
""" """
for pattern in MODEL_REGEXES: for pattern in MODEL_REGEXES:
match = re.match(pattern, column) match = pattern.match(column)
if match: if match:
return BayesrunColumnParsed(match.groupdict()) return BayesrunColumnParsed(match.groupdict())
else: else:
@@ -112,7 +135,7 @@ def _parse_bayesrun_row(
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
filename = file.name filename = file.name
match = re.match(FILENAME_REGEX, filename) match = FILENAME_REGEX.match(filename)
if not match: if not match:
raise ValueError(f"{filename} was not a valid bayesrun output") raise ValueError(f"{filename} was not a valid bayesrun output")
groups = match.groupdict() groups = match.groupdict()
@@ -123,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]: def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
for pattern in FILE_SLUG_REGEXES: for pattern in FILE_SLUG_REGEXES:
match = re.match(pattern, slug) match = pattern.match(slug)
if match: if match:
return match.groupdict() return match.groupdict()
else: else:

View File

@@ -36,6 +36,7 @@
self.packages.${system}.deepdogEnv self.packages.${system}.deepdogEnv
self.packages.${system}.deepdogApp self.packages.${system}.deepdogApp
pkgs.just pkgs.just
pkgs.nodejs
]; ];
shellHook = '' shellHook = ''
export DO_NIX_CUSTOM=1 export DO_NIX_CUSTOM=1

8
poetry.lock generated
View File

@@ -786,13 +786,13 @@ files = [
[[package]] [[package]]
name = "pdme" name = "pdme"
version = "1.0.0" version = "1.2.0"
description = "Python dipole model evaluator" description = "Python dipole model evaluator"
optional = false optional = false
python-versions = "<3.10,>=3.8.1" python-versions = "<3.10,>=3.8.1"
files = [ files = [
{file = "pdme-1.0.0-py3-none-any.whl", hash = "sha256:8fb8d1bf3d88f73118da5731332ae00c721b98daf53b225069e422af1a1a67f2"}, {file = "pdme-1.2.0-py3-none-any.whl", hash = "sha256:602710a053f22921b4adbc03d46d284149fe2367a65455cde56608708e01c84b"},
{file = "pdme-1.0.0.tar.gz", hash = "sha256:02cabf2e6fc2ddaf0871d0b3afcf265bca16520ee7bc1c74672be62f7a8390bd"}, {file = "pdme-1.2.0.tar.gz", hash = "sha256:412806d7ae384c048515e0f2cba70252778bf153800829a1d3265a0596872263"},
] ]
[package.dependencies] [package.dependencies]
@@ -1275,4 +1275,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<3.10" python-versions = ">=3.8.1,<3.10"
content-hash = "a28054e255cbd49396795127380c2b7a0cfd742b15cba2184322f3c4894ed041" content-hash = "918b6736766a9c1b6732a56e1ef2e7a53241f2e25babb884881e49c299801fc9"

View File

@@ -1,12 +1,12 @@
[tool.poetry] [tool.poetry]
name = "deepdog" name = "deepdog"
version = "1.0.0" version = "1.2.1"
description = "" description = ""
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"] authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.8.1,<3.10" python = ">=3.8.1,<3.10"
pdme = "^1.0.0" pdme = "^1.2.0"
numpy = "1.22.3" numpy = "1.22.3"
scipy = "1.10" scipy = "1.10"
tqdm = "^4.66.2" tqdm = "^4.66.2"

View File

View File

@@ -0,0 +1,137 @@
import pdme.measurement
import pdme.measurement.input_types
from pdme.model import (
LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel,
)
import deepdog.direct_monte_carlo.dmc_filters
import numpy.random
import numpy.testing
import logging
_logger = logging.getLogger(__name__)
def fixed_z_model_func(
xmin,
xmax,
ymin,
ymax,
zmin,
zmax,
wexp_min,
wexp_max,
pfixed,
n_max,
prob_occupancy,
):
return LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel(
xmin,
xmax,
ymin,
ymax,
zmin,
zmax,
wexp_min,
wexp_max,
pfixed,
0,
0,
n_max,
prob_occupancy,
)
def get_model(orientation):
model_funcs = {
"fixedz": fixed_z_model_func,
"free": LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
"fixedxy": LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
}
model = model_funcs[orientation](
-10,
10,
-17.5,
17.5,
5,
7.5,
-5,
6.5,
10**3,
2,
0.99999999,
)
model.n = 2
model.rng = numpy.random.default_rng(1234)
return (
f"connors_geom-5height-orientation_{orientation}-pfixexp_{3}-dipole_count_{2}",
model,
)
def test_electric_field_x_dmc_filter():
dipoles_raw = [
[(1, 2, 3), (4, 5, 6), 1],
[(-1, 5, 2), (6, 5, 4), 10],
]
dipoles = [
pdme.measurement.OscillatingDipole(numpy.array(d[0]), numpy.array(d[1]), d[2])
for d in dipoles_raw
]
_logger.debug(f"dipoles: {dipoles}")
dot_inputs_raw = [
([-1, -1, 0], 1),
([-1, -1, 0], 2),
([-1, -1, 0], 3),
([-1, -1, 0], 4),
]
dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(dot_inputs_raw)
_logger.debug(f"dot_inputs_array: {dot_inputs_array}")
arrangement = pdme.measurement.OscillatingDipoleArrangement(dipoles)
measurements = []
for input in dot_inputs_raw:
ex = sum(
[
dipole.s_electric_fieldx_at_position(*input)
for dipole in arrangement.dipoles
]
)
ex_low = ex * 0.5
ex_high = ex * 1.5
meas = pdme.measurement.DotRangeMeasurement(ex_low, ex_high, input[0], input[1])
measurements.append(meas)
filter = deepdog.direct_monte_carlo.dmc_filters.SingleDotSpinQubitFrequencyFilter(
measurements
)
samples = numpy.array(
[
[
[1, 2, 3, 4, 5, 6, 1],
[-1, 5, 2, 6, 5, 4, 10],
],
[
[10, 20, 30, 40, 50, 60, 1],
[-1, 5, 2, 6, 5, 4, 1],
],
[
[1, 1, 1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2, 1],
],
]
)
expected = samples[
0:1
] # only expect to see the first guy, because that's what generated our thing
filtered = filter.filter_samples(samples)
assert len(filtered) != len(samples), "Should have filtered some out!"
numpy.testing.assert_array_equal(
filtered, expected, "The filter should have only returned the first one"
)

View File

@@ -7,6 +7,7 @@ def test_parse_groupdict():
) )
parsed = deepdog.results._parse_bayesrun_column(example_column_name) parsed = deepdog.results._parse_bayesrun_column(example_column_name)
assert parsed is not None
expected = deepdog.results.BayesrunColumnParsed( expected = deepdog.results.BayesrunColumnParsed(
{ {
"xmin": "-20", "xmin": "-20",
@@ -23,6 +24,30 @@ def test_parse_groupdict():
assert parsed == expected assert parsed == expected
def test_parse_groupdict_with_magnitude():
example_column_name = (
"geom_-20_20_-10_10_0_5-magnitude_3.5-orientation_free-dipole_count_100_success"
)
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
assert parsed is not None
expected = deepdog.results.BayesrunColumnParsed(
{
"xmin": "-20",
"xmax": "20",
"ymin": "-10",
"ymax": "10",
"zmin": "0",
"zmax": "5",
"orientation": "free",
"avg_filled": "100",
"log_magnitude": "3.5",
"field_name": "success",
}
)
assert parsed == expected
# def test_parse_no_match_column_name(): # def test_parse_no_match_column_name():
# parsed = deepdog.results.parse_bayesrun_column("There's nothing here") # parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
# assert parsed is None # assert parsed is None

View File

@@ -1,158 +0,0 @@
import deepdog
import logging
import logging.config
import numpy.random
from pdme.model import (
LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel,
)
_logger = logging.getLogger(__name__)
def fixed_z_model_func(
xmin,
xmax,
ymin,
ymax,
zmin,
zmax,
wexp_min,
wexp_max,
pfixed,
n_max,
prob_occupancy,
):
return LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel(
xmin,
xmax,
ymin,
ymax,
zmin,
zmax,
wexp_min,
wexp_max,
pfixed,
0,
0,
n_max,
prob_occupancy,
)
def get_model(orientation):
model_funcs = {
"fixedz": fixed_z_model_func,
"free": LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
"fixedxy": LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
}
model = model_funcs[orientation](
-10,
10,
-17.5,
17.5,
5,
7.5,
-5,
6.5,
10**3,
2,
0.99999999,
)
model.n = 2
model.rng = numpy.random.default_rng(1234)
return (
f"connors_geom-5height-orientation_{orientation}-pfixexp_{3}-dipole_count_{2}",
model,
)
def test_basic_analysis(snapshot):
dot_positions = [[0, 0, 0], [0, 1, 0]]
freqs = [1, 10, 100]
models = []
orientations = ["free", "fixedxy", "fixedz"]
for orientation in orientations:
models.append(get_model(orientation))
_logger.info(f"have {len(models)} models to look at")
if len(models) == 1:
_logger.info(f"only one model, name: {models[0][0]}")
square_run = deepdog.BayesRunWithSubspaceSimulation(
dot_positions,
freqs,
models,
models[0][1],
filename_slug="test",
end_threshold=0.9,
ss_n_c=5,
ss_n_s=2,
ss_m_max=10,
ss_target_cost=150,
ss_level_0_seed=200,
ss_mcmc_seed=20,
ss_use_adaptive_steps=True,
ss_default_phi_step=0.01,
ss_default_theta_step=0.01,
ss_default_r_step=0.01,
ss_default_w_log_step=0.01,
ss_default_upper_w_log_step=4,
ss_dump_last_generation=False,
write_output_to_bayesruncsv=False,
ss_initial_costs_chunk_size=1000,
)
result = square_run.go()
assert result == snapshot
def test_bayesss_with_tighter_cost(snapshot):
dot_positions = [[0, 0, 0], [0, 1, 0]]
freqs = [1, 10, 100]
models = []
orientations = ["free", "fixedxy", "fixedz"]
for orientation in orientations:
models.append(get_model(orientation))
_logger.info(f"have {len(models)} models to look at")
if len(models) == 1:
_logger.info(f"only one model, name: {models[0][0]}")
square_run = deepdog.BayesRunWithSubspaceSimulation(
dot_positions,
freqs,
models,
models[0][1],
filename_slug="test",
end_threshold=0.9,
ss_n_c=5,
ss_n_s=2,
ss_m_max=10,
ss_target_cost=1.5,
ss_level_0_seed=200,
ss_mcmc_seed=20,
ss_use_adaptive_steps=True,
ss_default_phi_step=0.01,
ss_default_theta_step=0.01,
ss_default_r_step=0.01,
ss_default_w_log_step=0.01,
ss_default_upper_w_log_step=4,
ss_dump_last_generation=False,
write_output_to_bayesruncsv=False,
ss_initial_costs_chunk_size=1,
)
result = square_run.go()
assert result == snapshot