Compare commits

...

19 Commits

Author SHA1 Message Date
862eedf7bd chore(deps): update dependency pytest-cov to v4
Some checks failed
renovate/artifacts Artifact file update failure
gitea-physics/deepdog/pipeline/pr-master There was a failure building this commit
2023-07-25 01:31:04 +00:00
01c0d7e49b
chore(release): 0.7.2
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2023-07-24 10:44:51 -05:00
a170a3ce01
fix: fixes clamping format etc.
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2023-07-24 10:26:35 -05:00
9bb8fc50fe
feat: clamps results now
Some checks failed
gitea-physics/deepdog/pipeline/head There was a failure building this commit
2023-07-24 10:24:23 -05:00
f775ed34c6
chore(release): 0.7.1
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2023-07-24 02:04:42 -05:00
7d0c2b22cc Merge pull request 'mcmc' (#32) from mcmc into master
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
Reviewed-on: #32
2023-07-24 07:02:19 +00:00
d6e6876a79
fmt: fixes some linting issues
All checks were successful
gitea-physics/deepdog/pipeline/pr-master This commit looks good
2023-07-24 01:59:07 -05:00
fccf50eb27
fmt: formatting improvements 2023-07-24 01:55:37 -05:00
33cab9ab41
feat: adds subset simulation stuff
Some checks failed
gitea-physics/deepdog/pipeline/head There was a failure building this commit
gitea-physics/deepdog/pipeline/pr-master There was a failure building this commit
2023-07-24 01:50:56 -05:00
ad521ba472
deps: upgrades pdme version to use mcmc code
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2023-07-23 18:46:11 -05:00
266d6dd583
chore(release): 0.7.0
All checks were successful
gitea-physics/deepdog/pipeline/tag This commit looks good
gitea-physics/deepdog/pipeline/head This commit looks good
2023-05-01 10:26:01 -05:00
c573f8806d Merge pull request 'add_pairs' (#30) from add_pairs into master
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
Reviewed-on: #30
2023-05-01 15:24:57 +00:00
a015daf5ff
feat!: removes fastfilter parameter because it should never be needed
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/pr-master This commit looks good
2023-05-01 10:17:12 -05:00
a089951bbe
feat: adds pair capability to real spectrum run hopefully
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2023-05-01 10:05:46 -05:00
7568aef842
chore(release): 0.6.7
All checks were successful
gitea-physics/deepdog/pipeline/tag This commit looks good
gitea-physics/deepdog/pipeline/head This commit looks good
2023-04-13 20:26:06 -05:00
c4b6cbbb6f Merge pull request 'cap_core' (#29) from cap_core into master
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
Reviewed-on: #29
2023-04-14 01:24:01 +00:00
1cf4454153
fix: avoids redefinition of core count in loop
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/pr-master This commit looks good
2023-04-13 20:21:17 -05:00
bf15f4a7b7
feat: adds option to cap core count for real spectrum run
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2023-04-13 20:17:48 -05:00
12903b2540
feat: adds option to cap core count for temp aware run 2023-04-13 20:16:33 -05:00
9 changed files with 739 additions and 127 deletions

View File

@ -2,6 +2,50 @@
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
### [0.7.2](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.1...0.7.2) (2023-07-24)
### Features
* clamps results now ([9bb8fc5](https://gitea.deepak.science:2222/physics/deepdog/commit/9bb8fc50fe1bd1a285a333c5a396bfb6ac3176cf))
### Bug Fixes
* fixes clamping format etc. ([a170a3c](https://gitea.deepak.science:2222/physics/deepdog/commit/a170a3ce01adcec356e5aaab9abcc0ec4accd64b))
### [0.7.1](https://gitea.deepak.science:2222/physics/deepdog/compare/0.7.0...0.7.1) (2023-07-24)
### Features
* adds subset simulation stuff ([33cab9a](https://gitea.deepak.science:2222/physics/deepdog/commit/33cab9ab4179cec13ae9e591a8ffc32df4dda989))
## [0.7.0](https://gitea.deepak.science:2222/physics/deepdog/compare/0.6.7...0.7.0) (2023-05-01)
### ⚠ BREAKING CHANGES
* removes fastfilter parameter because it should never be needed
### Features
* adds pair capability to real spectrum run hopefully ([a089951](https://gitea.deepak.science:2222/physics/deepdog/commit/a089951bbefcd8a0b2efeb49b7a8090412cbb23d))
* removes fastfilter parameter because it should never be needed ([a015daf](https://gitea.deepak.science:2222/physics/deepdog/commit/a015daf5ff6fa5f6155c8d7e02981b588840a5b0))
### [0.6.7](https://gitea.deepak.science:2222/physics/deepdog/compare/0.6.6...0.6.7) (2023-04-14)
### Features
* adds option to cap core count for real spectrum run ([bf15f4a](https://gitea.deepak.science:2222/physics/deepdog/commit/bf15f4a7b7f59504983624e7d512ed7474372032))
* adds option to cap core count for temp aware run ([12903b2](https://gitea.deepak.science:2222/physics/deepdog/commit/12903b2540cefb040174d230bc0d04719a6dc1b7))
### Bug Fixes
* avoids redefinition of core count in loop ([1cf4454](https://gitea.deepak.science:2222/physics/deepdog/commit/1cf44541531541088198bd4599d467df3e1acbcf))
### [0.6.6](https://gitea.deepak.science:2222/physics/deepdog/compare/0.6.5...0.6.6) (2023-04-09)

View File

@ -4,6 +4,7 @@ from deepdog.bayes_run import BayesRun
from deepdog.bayes_run_simulpairs import BayesRunSimulPairs
from deepdog.real_spectrum_run import RealSpectrumRun
from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun
from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation
def get_version():
@ -16,6 +17,7 @@ __all__ = [
"BayesRunSimulPairs",
"RealSpectrumRun",
"TempAwareRealSpectrumRun",
"BayesRunWithSubspaceSimulation",
]

View File

@ -0,0 +1,232 @@
import deepdog.subset_simulation
import pdme.inputs
import pdme.model
import pdme.measurement.input_types
import pdme.measurement.oscillating_dipole
import pdme.util.fast_v_calc
import pdme.util.fast_nonlocal_spectrum
from typing import Sequence, Tuple, List, Optional
import datetime
import csv
import logging
import numpy
import numpy.typing
# TODO: remove hardcode
CHUNKSIZE = 50
# TODO: It's garbage to have this here duplicated from pdme.
DotInput = Tuple[numpy.typing.ArrayLike, float]
CLAMPING_FACTOR = 10
_logger = logging.getLogger(__name__)
class BayesRunWithSubspaceSimulation:
"""
A single Bayes run for a given set of dots.
Parameters
----------
dot_inputs : Sequence[DotInput]
The dot inputs for this bayes run.
models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)]
The models to evaluate.
actual_model : pdme.model.DipoleModel
The model which is actually correct.
filename_slug : str
The filename slug to include.
run_count: int
The number of runs to do.
"""
def __init__(
self,
dot_positions: Sequence[numpy.typing.ArrayLike],
frequency_range: Sequence[float],
models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]],
actual_model: pdme.model.DipoleModel,
filename_slug: str,
max_frequency: float = 20,
end_threshold: float = None,
run_count=100,
chunksize: int = CHUNKSIZE,
ss_n_c: int = 500,
ss_n_s: int = 100,
ss_m_max: int = 15,
ss_target_cost: Optional[float] = None,
ss_level_0_seed: int = 200,
ss_mcmc_seed: int = 20,
ss_use_adaptive_steps=True,
ss_default_phi_step=0.01,
ss_default_theta_step=0.01,
ss_default_r_step=0.01,
ss_default_w_log_step=0.01,
ss_default_upper_w_log_step=4,
) -> None:
self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
dot_positions, frequency_range
)
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
self.dot_inputs
)
self.models_with_names = models_with_names
self.models = [model for (_, model) in models_with_names]
self.model_names = [name for (name, _) in models_with_names]
self.actual_model = actual_model
self.n: int
try:
self.n = self.actual_model.n # type: ignore
except AttributeError:
self.n = 1
self.model_count = len(self.models)
self.csv_fields = []
for i in range(self.n):
self.csv_fields.extend(
[
f"dipole_moment_{i+1}",
f"dipole_location_{i+1}",
f"dipole_frequency_{i+1}",
]
)
self.compensate_zeros = True
self.chunksize = chunksize
for name in self.model_names:
self.csv_fields.extend([f"{name}_likelihood", f"{name}_prob"])
self.probabilities = [1 / self.model_count] * self.model_count
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
self.max_frequency = max_frequency
if end_threshold is not None:
if 0 < end_threshold < 1:
self.end_threshold: float = end_threshold
self.use_end_threshold = True
_logger.info(f"Will abort early, at {self.end_threshold}.")
else:
raise ValueError(
f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
)
self.ss_n_c = ss_n_c
self.ss_n_s = ss_n_s
self.ss_m_max = ss_m_max
self.ss_target_cost = ss_target_cost
self.ss_level_0_seed = ss_level_0_seed
self.ss_mcmc_seed = ss_mcmc_seed
self.ss_use_adaptive_steps = ss_use_adaptive_steps
self.ss_default_phi_step = ss_default_phi_step
self.ss_default_theta_step = ss_default_theta_step
self.ss_default_r_step = ss_default_r_step
self.ss_default_w_log_step = ss_default_w_log_step
self.ss_default_upper_w_log_step = ss_default_upper_w_log_step
self.run_count = run_count
def go(self) -> None:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
writer.writeheader()
for run in range(1, self.run_count + 1):
# Generate the actual dipoles
actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
measurements = actual_dipoles.get_dot_measurements(self.dot_inputs)
_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
# define a new seed sequence for each run
results = []
_logger.debug("Going to iterate over models now")
for model_count, model in enumerate(self.models_with_names):
_logger.debug(f"Doing model #{model_count}, {model[0]}")
subset_run = deepdog.subset_simulation.SubsetSimulation(
model,
self.dot_inputs,
measurements,
self.ss_n_c,
self.ss_n_s,
self.ss_m_max,
self.ss_target_cost,
self.ss_level_0_seed,
self.ss_mcmc_seed,
self.ss_use_adaptive_steps,
self.ss_default_phi_step,
self.ss_default_theta_step,
self.ss_default_r_step,
self.ss_default_w_log_step,
self.ss_default_upper_w_log_step,
)
results.append(subset_run.execute())
_logger.debug("Done, constructing output now")
row = {
"dipole_moment_1": actual_dipoles.dipoles[0].p,
"dipole_location_1": actual_dipoles.dipoles[0].s,
"dipole_frequency_1": actual_dipoles.dipoles[0].w,
}
for i in range(1, self.n):
try:
current_dipoles = actual_dipoles.dipoles[i]
row[f"dipole_moment_{i+1}"] = current_dipoles.p
row[f"dipole_location_{i+1}"] = current_dipoles.s
row[f"dipole_frequency_{i+1}"] = current_dipoles.w
except IndexError:
_logger.info(f"Not writing anymore, saw end after {i}")
break
likelihoods: List[float] = []
for (name, result) in zip(self.model_names, results):
if result.over_target_likelihood is None:
clamped_likelihood = result.probs_list[-1][0] / CLAMPING_FACTOR
_logger.warning(f"got a none result, clamping to {clamped_likelihood}")
else:
clamped_likelihood = result.over_target_likelihood
likelihoods.append(clamped_likelihood)
row[f"{name}_likelihood"] = clamped_likelihood
success_weight = sum(
[
likelihood * prob
for likelihood, prob in zip(likelihoods, self.probabilities)
]
)
new_probabilities = [
likelihood * old_prob / success_weight
for likelihood, old_prob in zip(likelihoods, self.probabilities)
]
self.probabilities = new_probabilities
for name, probability in zip(self.model_names, self.probabilities):
row[f"{name}_prob"] = probability
_logger.info(row)
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(
outfile, fieldnames=self.csv_fields, dialect="unix"
)
writer.writerow(row)
if self.use_end_threshold:
max_prob = max(self.probabilities)
if max_prob > self.end_threshold:
_logger.info(
f"Aborting early, because {max_prob} is greater than {self.end_threshold}"
)
break

View File

@ -5,7 +5,7 @@ import pdme.measurement.input_types
import pdme.measurement.oscillating_dipole
import pdme.util.fast_v_calc
import pdme.util.fast_nonlocal_spectrum
from typing import Sequence, Tuple, List, Dict, Union
from typing import Sequence, Tuple, List, Dict, Union, Optional
import datetime
import csv
import multiprocessing
@ -20,16 +20,50 @@ CHUNKSIZE = 50
_logger = logging.getLogger(__name__)
def get_a_result(input) -> int:
model, dot_inputs, lows, highs, monte_carlo_count, seed = input
def get_a_result_fast_filter_pairs(input) -> int:
(
model,
dot_inputs,
lows,
highs,
pair_inputs,
pair_lows,
pair_highs,
monte_carlo_count,
seed,
) = input
rng = numpy.random.default_rng(seed)
# TODO: A long term refactor is to pull the frequency stuff out from here. The None stands for max_frequency, which is unneeded in the actually useful models.
sample_dipoles = model.get_monte_carlo_dipole_inputs(
monte_carlo_count, None, rng_to_use=rng
)
vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(dot_inputs, sample_dipoles)
return numpy.count_nonzero(pdme.util.fast_v_calc.between(vals, lows, highs))
current_sample = sample_dipoles
for di, low, high in zip(dot_inputs, lows, highs):
if len(current_sample) < 1:
break
vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
numpy.array([di]), current_sample
)
current_sample = current_sample[numpy.all((vals > low) & (vals < high), axis=1)]
for pi, plow, phigh in zip(pair_inputs, pair_lows, pair_highs):
if len(current_sample) < 1:
break
vals = pdme.util.fast_nonlocal_spectrum.fast_s_nonlocal_dipoleses(
numpy.array([pi]), current_sample
)
current_sample = current_sample[
numpy.all(
((vals > plow) & (vals < phigh)) | ((vals < plow) & (vals > phigh)),
axis=1,
)
]
return len(current_sample)
def get_a_result_fast_filter(input) -> int:
@ -87,7 +121,10 @@ class RealSpectrumRun:
max_monte_carlo_cycles_steps: int = 10,
chunksize: int = CHUNKSIZE,
initial_seed: int = 12345,
use_fast_filter: bool = True,
cap_core_count: int = 0,
pair_measurements: Optional[
Sequence[pdme.measurement.DotPairRangeMeasurement]
] = None,
) -> None:
self.measurements = measurements
self.dot_inputs = [(measure.r, measure.f) for measure in self.measurements]
@ -96,6 +133,21 @@ class RealSpectrumRun:
self.dot_inputs
)
if pair_measurements is not None:
self.pair_measurements = pair_measurements
self.use_pair_measurements = True
self.dot_pair_inputs = [
(measure.r1, measure.r2, measure.f)
for measure in self.pair_measurements
]
self.dot_pair_inputs_array = (
pdme.measurement.input_types.dot_pair_inputs_to_array(
self.dot_pair_inputs
)
)
else:
self.use_pair_measurements = False
self.models = [model for (_, model) in models_with_names]
self.model_names = [name for (name, _) in models_with_names]
self.model_count = len(self.models)
@ -116,13 +168,14 @@ class RealSpectrumRun:
self.probabilities = [1 / self.model_count] * self.model_count
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
self.use_fast_filter = use_fast_filter
ff_string = "no_fast_filter"
if self.use_fast_filter:
ff_string = "fast_filter"
ff_string = "fast_filter"
self.filename = f"{timestamp}-{filename_slug}.realdata.{ff_string}.bayesrun.csv"
self.initial_seed = initial_seed
self.cap_core_count = cap_core_count
def go(self) -> None:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
@ -135,16 +188,29 @@ class RealSpectrumRun:
self.measurements
)
pair_lows = None
pair_highs = None
if self.use_pair_measurements:
(
pair_lows,
pair_highs,
) = pdme.measurement.input_types.dot_range_measurements_low_high_arrays(
self.pair_measurements
)
# define a new seed sequence for each run
seed_sequence = numpy.random.SeedSequence(self.initial_seed)
results = []
_logger.debug("Going to iterate over models now")
core_count = multiprocessing.cpu_count() - 1 or 1
if (self.cap_core_count >= 1) and (self.cap_core_count < core_count):
core_count = self.cap_core_count
_logger.info(f"Using {core_count} cores")
for model_count, (model, model_name) in enumerate(
zip(self.models, self.model_names)
):
_logger.debug(f"Doing model #{model_count}: {model_name}")
core_count = multiprocessing.cpu_count() - 1 or 1
with multiprocessing.Pool(core_count) as pool:
cycle_count = 0
cycle_success = 0
@ -162,27 +228,46 @@ class RealSpectrumRun:
# that way we get more stuff.
seeds = seed_sequence.spawn(self.monte_carlo_cycles)
if self.use_fast_filter:
result_func = get_a_result_fast_filter
else:
result_func = get_a_result
current_success = sum(
pool.imap_unordered(
result_func,
[
(
model,
self.dot_inputs_array,
lows,
highs,
self.monte_carlo_count,
seed,
)
for seed in seeds
],
self.chunksize,
if self.use_pair_measurements:
current_success = sum(
pool.imap_unordered(
get_a_result_fast_filter_pairs,
[
(
model,
self.dot_inputs_array,
lows,
highs,
self.dot_pair_inputs_array,
pair_lows,
pair_highs,
self.monte_carlo_count,
seed,
)
for seed in seeds
],
self.chunksize,
)
)
else:
current_success = sum(
pool.imap_unordered(
get_a_result_fast_filter,
[
(
model,
self.dot_inputs_array,
lows,
highs,
self.monte_carlo_count,
seed,
)
for seed in seeds
],
self.chunksize,
)
)
)
cycle_success += current_success
_logger.debug(f"current running successes: {cycle_success}")

View File

@ -0,0 +1,3 @@
from deepdog.subset_simulation.subset_simulation_impl import SubsetSimulation
__all__ = ["SubsetSimulation"]

View File

@ -0,0 +1,309 @@
import logging
import numpy
import pdme.measurement
import pdme.measurement.input_types
import pdme.subspace_simulation
from typing import Sequence, Tuple, Optional
from dataclasses import dataclass
_logger = logging.getLogger(__name__)
@dataclass
class SubsetSimulationResult:
probs_list: Sequence[Tuple]
over_target_cost: Optional[float]
over_target_likelihood: Optional[float]
under_target_cost: Optional[float]
under_target_likelihood: Optional[float]
class SubsetSimulation:
def __init__(
self,
model_name_pair,
dot_inputs,
actual_measurements: Sequence[pdme.measurement.DotMeasurement],
n_c: int,
n_s: int,
m_max: int,
target_cost: Optional[float] = None,
level_0_seed: int = 200,
mcmc_seed: int = 20,
use_adaptive_steps=True,
default_phi_step=0.01,
default_theta_step=0.01,
default_r_step=0.01,
default_w_log_step=0.01,
default_upper_w_log_step=4,
):
name, model = model_name_pair
self.model_name = name
self.model = model
_logger.info(f"got model {self.model_name}")
self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
dot_inputs
)
# _logger.debug(f"actual measurements: {actual_measurements}")
self.actual_measurement_array = numpy.array([m.v for m in actual_measurements])
def cost_function_to_use(dipoles_to_test):
return pdme.subspace_simulation.proportional_costs_vs_actual_measurement(
self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test
)
self.cost_function_to_use = cost_function_to_use
self.n_c = n_c
self.n_s = n_s
self.m_max = m_max
self.level_0_seed = level_0_seed
self.mcmc_seed = mcmc_seed
self.use_adaptive_steps = use_adaptive_steps
self.default_phi_step = default_phi_step
self.default_theta_step = default_theta_step
self.default_r_step = default_r_step
self.default_w_log_step = default_w_log_step
self.default_upper_w_log_step = default_upper_w_log_step
_logger.info("using params:")
_logger.info(f"\tn_c: {self.n_c}")
_logger.info(f"\tn_s: {self.n_s}")
_logger.info(f"\tm: {self.m_max}")
_logger.info("let's do level 0...")
self.target_cost = target_cost
_logger.info(f"will stop at target cost {target_cost}")
def execute(self) -> SubsetSimulationResult:
probs_list = []
sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
self.n_c * self.n_s,
-1,
rng_to_use=numpy.random.default_rng(self.level_0_seed),
)
# _logger.debug(sample_dipoles)
# _logger.debug(sample_dipoles.shape)
costs = self.cost_function_to_use(sample_dipoles)
_logger.debug(f"costs: {costs}")
sorted_indexes = costs.argsort()[::-1]
_logger.debug(costs[sorted_indexes])
_logger.debug(sample_dipoles[sorted_indexes])
sorted_costs = costs[sorted_indexes]
sorted_dipoles = sample_dipoles[sorted_indexes]
threshold_cost = sorted_costs[-self.n_c]
all_dipoles = numpy.array(
[
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp)
for samp in sorted_dipoles
]
)
all_chains = list(zip(sorted_costs, all_dipoles))
mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
for i in range(self.m_max):
next_seeds = all_chains[-self.n_c:]
for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
probs_list.append(
(
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
/ (self.n_s ** (i)),
cost_chain[0],
i + 1,
)
)
next_seeds_as_array = numpy.array([s for _, s in next_seeds])
stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
_logger.info(f"got stdevs: {stdevs.stdevs}")
all_chains = []
for c, s in next_seeds:
# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
# until new version gotta do
chain = self.model.get_mcmc_chain(
s,
self.cost_function_to_use,
self.n_s,
threshold_cost,
stdevs,
initial_cost=c,
rng_arg=mcmc_rng,
)
for cost, chained in chain:
try:
filtered_cost = cost[0]
except IndexError:
filtered_cost = cost
all_chains.append((filtered_cost, chained))
# _logger.debug(all_chains)
all_chains.sort(key=lambda c: c[0], reverse=True)
threshold_cost = all_chains[-self.n_c][0]
_logger.info(
f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}"
)
if (self.target_cost is not None) and (threshold_cost < self.target_cost):
_logger.info(
f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early"
)
cost_list = [c[0] for c in all_chains]
over_index = reverse_bisect_right(cost_list, self.target_cost)
shorter_probs_list = []
for cost_index, cost_chain in enumerate(all_chains):
probs_list.append(
(
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
/ (self.n_s ** (i)),
cost_chain[0],
i + 1,
)
)
shorter_probs_list.append(
(
cost_chain[0],
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
/ (self.n_s ** (i)),
)
)
# _logger.info(shorter_probs_list)
result = SubsetSimulationResult(
probs_list=probs_list,
over_target_cost=shorter_probs_list[over_index - 1][0],
over_target_likelihood=shorter_probs_list[over_index - 1][1],
under_target_cost=shorter_probs_list[over_index][0],
under_target_likelihood=shorter_probs_list[over_index][1],
)
return result
# _logger.debug([c[0] for c in all_chains[-n_c:]])
_logger.info(f"doing level {i + 1}")
for cost_index, cost_chain in enumerate(all_chains):
probs_list.append(
(
((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
/ (self.n_s ** (self.m_max)),
cost_chain[0],
self.m_max + 1,
)
)
threshold_cost = all_chains[-self.n_c][0]
_logger.info(
f"final threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{self.m_max + 1}"
)
for a in all_chains[-10:]:
_logger.info(a)
# for prob, prob_cost in probs_list:
# _logger.info(f"\t{prob}: {prob_cost}")
probs_list.sort(key=lambda c: c[0], reverse=True)
result = SubsetSimulationResult(
probs_list=probs_list,
over_target_cost=None,
over_target_likelihood=None,
under_target_cost=None,
under_target_likelihood=None,
)
return result
def get_stdevs_from_arrays(
self, array
) -> pdme.subspace_simulation.MCMCStandardDeviation:
# stdevs = get_stdevs_from_arrays(next_seeds_as_array, model)
if self.use_adaptive_steps:
stdev_array = []
count = array.shape[1]
for dipole_index in range(count):
selected = array[:, dipole_index]
pxs = selected[:, 0]
pys = selected[:, 1]
pzs = selected[:, 2]
thetas = numpy.arccos(pzs / self.model.pfixed)
phis = numpy.arctan2(pys, pxs)
rstdevs = numpy.maximum(
numpy.std(selected, axis=0)[3:6],
self.default_r_step / (self.n_s * 10),
)
frequency_stdevs = numpy.minimum(
numpy.maximum(
numpy.std(numpy.log(selected[:, -1])),
self.default_w_log_step / (self.n_s * 10),
),
self.default_upper_w_log_step,
)
stdev_array.append(
pdme.subspace_simulation.DipoleStandardDeviation(
p_theta_step=max(
numpy.std(thetas), self.default_theta_step / (self.n_s * 10)
),
p_phi_step=max(
numpy.std(phis), self.default_phi_step / (self.n_s * 10)
),
rx_step=rstdevs[0],
ry_step=rstdevs[1],
rz_step=rstdevs[2],
w_log_step=frequency_stdevs,
)
)
else:
default_stdev = pdme.subspace_simulation.DipoleStandardDeviation(
self.default_phi_step,
self.default_theta_step,
self.default_r_step,
self.default_r_step,
self.default_r_step,
self.default_w_log_step,
)
stdev_array = [default_stdev]
stdevs = pdme.subspace_simulation.MCMCStandardDeviation(stdev_array)
return stdevs
def reverse_bisect_right(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted in descending order.
The return value i is such that all e in a[:i] have e >= x, and all e in
a[i:] have e < x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
Essentially, the function returns number of elements in a which are >= than x.
>>> a = [8, 6, 5, 4, 2]
>>> reverse_bisect_right(a, 5)
3
>>> a[:reverse_bisect_right(a, 5)]
[8, 6, 5]
"""
if lo < 0:
raise ValueError("lo must be non-negative")
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo + hi) // 2
if x > a[mid]:
hi = mid
else:
lo = mid + 1
return lo

View File

@ -90,6 +90,7 @@ class TempAwareRealSpectrumRun:
max_monte_carlo_cycles_steps: int = 10,
chunksize: int = CHUNKSIZE,
initial_seed: int = 12345,
cap_core_count: int = 0,
) -> None:
self.measurements_dict = measurements_dict
self.dot_inputs_dict = {
@ -126,6 +127,8 @@ class TempAwareRealSpectrumRun:
self.filename = f"{timestamp}-{filename_slug}.realdata.{ff_string}.bayesrun.csv"
self.initial_seed = initial_seed
self.cap_core_count = cap_core_count
def go(self) -> None:
with open(self.filename, "a", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
@ -146,11 +149,14 @@ class TempAwareRealSpectrumRun:
results = []
_logger.debug("Going to iterate over models now")
core_count = multiprocessing.cpu_count() - 1 or 1
if (self.cap_core_count >= 1) and (self.cap_core_count < core_count):
core_count = self.cap_core_count
_logger.info(f"Using {core_count} cores")
for model_count, (model, model_name) in enumerate(
zip(self.models, self.model_names)
):
_logger.debug(f"Doing model #{model_count}: {model_name}")
core_count = multiprocessing.cpu_count() - 1 or 1
with multiprocessing.Pool(core_count) as pool:
cycle_count = 0
cycle_success = 0

113
poetry.lock generated
View File

@ -94,7 +94,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7
[[package]]
name = "coverage"
version = "7.2.3"
version = "7.2.7"
description = "Code coverage measurement for Python"
category = "dev"
optional = false
@ -360,11 +360,11 @@ python-versions = ">=3.7"
[[package]]
name = "pdme"
version = "0.8.8"
version = "0.9.1"
description = "Python dipole model evaluator"
category = "main"
optional = false
python-versions = ">=3.8,<3.10"
python-versions = ">=3.8.1,<3.10"
[package.dependencies]
numpy = ">=1.22.3,<2.0.0"
@ -729,8 +729,8 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-co
[metadata]
lock-version = "1.1"
python-versions = "^3.8,<3.10"
content-hash = "d32b74325a18dc187501980f37d128a2a07d7bb0e4ea2c5cb14cf14f8b7a0222"
python-versions = ">=3.8.1,<3.10"
content-hash = "0161af7edf18c16819f1ce083ab491c17c9809f2770219725131451b1a16a970"
[metadata.files]
black = []
@ -738,10 +738,7 @@ bleach = []
certifi = []
cffi = []
charset-normalizer = []
click = [
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
]
click = []
click-log = []
colorama = []
coverage = []
@ -749,10 +746,7 @@ cryptography = []
docutils = []
dotty-dict = []
exceptiongroup = []
flake8 = [
{file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"},
{file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
]
flake8 = []
gitdb = []
gitpython = []
idna = []
@ -763,103 +757,40 @@ invoke = []
"jaraco.classes" = []
jeepney = []
keyring = []
mccabe = [
{file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
{file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
]
mccabe = []
more-itertools = []
mypy = []
mypy-extensions = []
numpy = [
{file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"},
{file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"},
{file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"},
{file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"},
{file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"},
{file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"},
{file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"},
{file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"},
{file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"},
{file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"},
{file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"},
{file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"},
{file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"},
{file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"},
{file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"},
{file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"},
{file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"},
{file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"},
{file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"},
{file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"},
]
numpy = []
packaging = []
pathspec = []
pdme = []
pkginfo = []
platformdirs = []
pluggy = [
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
]
pycodestyle = [
{file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"},
{file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"},
]
pycparser = [
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
]
pyflakes = [
{file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"},
{file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"},
]
pluggy = []
pycodestyle = []
pycparser = []
pyflakes = []
pygments = []
pytest = []
pytest-cov = [
{file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"},
{file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"},
]
pytest-cov = []
python-gitlab = []
python-semantic-release = []
pywin32-ctypes = [
{file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"},
{file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"},
]
pywin32-ctypes = []
readme-renderer = []
requests = []
requests-toolbelt = []
rfc3986 = [
{file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"},
{file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"},
]
rfc3986 = []
scipy = []
secretstorage = []
semver = [
{file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"},
{file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
smmap = [
{file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
{file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
]
tomli = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
semver = []
six = []
smmap = []
tomli = []
tomlkit = []
tqdm = []
twine = [
{file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"},
{file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"},
]
twine = []
typing-extensions = []
urllib3 = []
webencodings = [
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
]
webencodings = []
zipp = []

View File

@ -1,19 +1,19 @@
[tool.poetry]
name = "deepdog"
version = "0.6.6"
version = "0.7.2"
description = ""
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.8,<3.10"
pdme = "^0.8.6"
python = ">=3.8.1,<3.10"
pdme = "^0.9.1"
numpy = "1.22.3"
scipy = "1.10"
[tool.poetry.dev-dependencies]
pytest = ">=6"
flake8 = "^4.0.1"
pytest-cov = "^3.0.0"
pytest-cov = "^4.0.0"
mypy = "^0.971"
python-semantic-release = "^7.24.0"
black = "^22.3.0"