From ad521ba472066d4ada23092a747a46e31a9e306e Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Sun, 23 Jul 2023 18:45:53 -0500 Subject: [PATCH 1/4] deps: upgrades pdme version to use mcmc code --- poetry.lock | 113 ++++++++++--------------------------------------- pyproject.toml | 4 +- 2 files changed, 24 insertions(+), 93 deletions(-) diff --git a/poetry.lock b/poetry.lock index 55cf0c6..42ad5ec 100644 --- a/poetry.lock +++ b/poetry.lock @@ -94,7 +94,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7 [[package]] name = "coverage" -version = "7.2.3" +version = "7.2.7" description = "Code coverage measurement for Python" category = "dev" optional = false @@ -360,11 +360,11 @@ python-versions = ">=3.7" [[package]] name = "pdme" -version = "0.8.8" +version = "0.8.9" description = "Python dipole model evaluator" category = "main" optional = false -python-versions = ">=3.8,<3.10" +python-versions = ">=3.8.1,<3.10" [package.dependencies] numpy = ">=1.22.3,<2.0.0" @@ -729,8 +729,8 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-co [metadata] lock-version = "1.1" -python-versions = "^3.8,<3.10" -content-hash = "d32b74325a18dc187501980f37d128a2a07d7bb0e4ea2c5cb14cf14f8b7a0222" +python-versions = ">=3.8.1,<3.10" +content-hash = "7a708c21744e80b6b5e94f7ef2d5b58f9539179d2c0dd1d9f17b3daa4151d52f" [metadata.files] black = [] @@ -738,10 +738,7 @@ bleach = [] certifi = [] cffi = [] charset-normalizer = [] -click = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, -] +click = [] click-log = [] colorama = [] coverage = [] @@ -749,10 +746,7 @@ cryptography = [] docutils = [] dotty-dict = [] exceptiongroup = [] -flake8 = [ - {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, - {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, -] +flake8 = [] gitdb = [] gitpython = [] idna = [] @@ -763,103 +757,40 @@ invoke = [] "jaraco.classes" = [] jeepney = [] keyring = [] -mccabe = [ - {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, - {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, -] +mccabe = [] more-itertools = [] mypy = [] mypy-extensions = [] -numpy = [ - {file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"}, - {file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"}, - {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"}, - {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"}, - {file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"}, - {file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"}, - {file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"}, - {file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"}, - {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"}, - {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"}, - {file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"}, - {file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"}, - {file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"}, - {file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"}, - {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"}, - {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"}, - {file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"}, - {file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"}, - {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"}, - {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"}, -] +numpy = [] packaging = [] pathspec = [] pdme = [] pkginfo = [] platformdirs = [] -pluggy = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, -] -pycodestyle = [ - {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, - {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, -] -pycparser = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, -] -pyflakes = [ - {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, - {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"}, -] +pluggy = [] +pycodestyle = [] +pycparser = [] +pyflakes = [] pygments = [] pytest = [] -pytest-cov = [ - {file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"}, - {file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"}, -] +pytest-cov = [] python-gitlab = [] python-semantic-release = [] -pywin32-ctypes = [ - {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"}, - {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"}, -] +pywin32-ctypes = [] readme-renderer = [] requests = [] requests-toolbelt = [] -rfc3986 = [ - {file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"}, - {file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"}, -] +rfc3986 = [] scipy = [] secretstorage = [] -semver = [ - {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"}, - {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, -] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -smmap = [ - {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, - {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, -] -tomli = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] +semver = [] +six = [] +smmap = [] +tomli = [] tomlkit = [] tqdm = [] -twine = [ - {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"}, - {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"}, -] +twine = [] typing-extensions = [] urllib3 = [] -webencodings = [ - {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, - {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, -] +webencodings = [] zipp = [] diff --git a/pyproject.toml b/pyproject.toml index 355f8f2..6f375fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,8 @@ description = "" authors = ["Deepak Mallubhotla "] [tool.poetry.dependencies] -python = "^3.8,<3.10" -pdme = "^0.8.6" +python = ">=3.8.1,<3.10" +pdme = "^0.8.9" numpy = "1.22.3" scipy = "1.10" From 33cab9ab4179cec13ae9e591a8ffc32df4dda989 Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Mon, 24 Jul 2023 01:50:56 -0500 Subject: [PATCH 2/4] feat: adds subset simulation stuff --- deepdog/__init__.py | 3 +- deepdog/bayes_run_with_ss.py | 229 +++++++++++++ deepdog/subset_simulation/__init__.py | 3 + .../subset_simulation_impl.py | 310 ++++++++++++++++++ poetry.lock | 4 +- pyproject.toml | 2 +- 6 files changed, 547 insertions(+), 4 deletions(-) create mode 100644 deepdog/bayes_run_with_ss.py create mode 100644 deepdog/subset_simulation/__init__.py create mode 100644 deepdog/subset_simulation/subset_simulation_impl.py diff --git a/deepdog/__init__.py b/deepdog/__init__.py index 8dc5987..5db1f84 100644 --- a/deepdog/__init__.py +++ b/deepdog/__init__.py @@ -4,7 +4,7 @@ from deepdog.bayes_run import BayesRun from deepdog.bayes_run_simulpairs import BayesRunSimulPairs from deepdog.real_spectrum_run import RealSpectrumRun from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun - +from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation def get_version(): return __version__ @@ -16,6 +16,7 @@ __all__ = [ "BayesRunSimulPairs", "RealSpectrumRun", "TempAwareRealSpectrumRun", + "BayesRunWithSubspaceSimulation", ] diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py new file mode 100644 index 0000000..ad73e00 --- /dev/null +++ b/deepdog/bayes_run_with_ss.py @@ -0,0 +1,229 @@ +import deepdog.subset_simulation +import pdme.inputs +import pdme.model +import pdme.measurement.input_types +import pdme.measurement.oscillating_dipole +import pdme.util.fast_v_calc +import pdme.util.fast_nonlocal_spectrum +from typing import Sequence, Tuple, List, Optional +import datetime +import csv +import multiprocessing +import logging +import numpy +import numpy.typing + + +# TODO: remove hardcode +CHUNKSIZE = 50 + +# TODO: It's garbage to have this here duplicated from pdme. +DotInput = Tuple[numpy.typing.ArrayLike, float] + + +_logger = logging.getLogger(__name__) + + +class BayesRunWithSubspaceSimulation: + """ + A single Bayes run for a given set of dots. + + Parameters + ---------- + dot_inputs : Sequence[DotInput] + The dot inputs for this bayes run. + + models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)] + The models to evaluate. + + actual_model : pdme.model.DipoleModel + The model which is actually correct. + + filename_slug : str + The filename slug to include. + + run_count: int + The number of runs to do. + """ + + def __init__( + self, + dot_positions: Sequence[numpy.typing.ArrayLike], + frequency_range: Sequence[float], + models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]], + actual_model: pdme.model.DipoleModel, + filename_slug: str, + max_frequency: float = 20, + end_threshold: float = None, + run_count = 100, + chunksize: int = CHUNKSIZE, + ss_n_c: int = 500, + ss_n_s: int = 100, + ss_m_max: int = 15, + ss_target_cost: Optional[float] = None, + ss_level_0_seed: int = 200, + ss_mcmc_seed: int = 20, + ss_use_adaptive_steps=True, + ss_default_phi_step=0.01, + ss_default_theta_step=0.01, + ss_default_r_step=0.01, + ss_default_w_log_step=0.01, + ss_default_upper_w_log_step=4, + ) -> None: + self.dot_inputs = pdme.inputs.inputs_with_frequency_range( + dot_positions, frequency_range + ) + self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( + self.dot_inputs + ) + + self.models_with_names = models_with_names + self.models = [model for (_, model) in models_with_names] + self.model_names = [name for (name, _) in models_with_names] + self.actual_model = actual_model + + self.n: int + try: + self.n = self.actual_model.n # type: ignore + except AttributeError: + self.n = 1 + + self.model_count = len(self.models) + + self.csv_fields = [] + for i in range(self.n): + self.csv_fields.extend( + [ + f"dipole_moment_{i+1}", + f"dipole_location_{i+1}", + f"dipole_frequency_{i+1}", + ] + ) + self.compensate_zeros = True + self.chunksize = chunksize + for name in self.model_names: + self.csv_fields.extend([f"{name}_likelihood", f"{name}_prob"]) + + self.probabilities = [1 / self.model_count] * self.model_count + + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv" + self.max_frequency = max_frequency + + if end_threshold is not None: + if 0 < end_threshold < 1: + self.end_threshold: float = end_threshold + self.use_end_threshold = True + _logger.info(f"Will abort early, at {self.end_threshold}.") + else: + raise ValueError( + f"end_threshold should be between 0 and 1, but is actually {end_threshold}" + ) + + self.ss_n_c = ss_n_c + self.ss_n_s = ss_n_s + self.ss_m_max = ss_m_max + self.ss_target_cost = ss_target_cost + self.ss_level_0_seed = ss_level_0_seed + self.ss_mcmc_seed = ss_mcmc_seed + self.ss_use_adaptive_steps = ss_use_adaptive_steps + self.ss_default_phi_step = ss_default_phi_step + self.ss_default_theta_step = ss_default_theta_step + self.ss_default_r_step = ss_default_r_step + self.ss_default_w_log_step = ss_default_w_log_step + self.ss_default_upper_w_log_step = ss_default_upper_w_log_step + + self.run_count = run_count + + def go(self) -> None: + with open(self.filename, "a", newline="") as outfile: + writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix") + writer.writeheader() + + for run in range(1, self.run_count + 1): + + # Generate the actual dipoles + actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) + + measurements = actual_dipoles.get_dot_measurements( + self.dot_inputs + ) + + + _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") + + # define a new seed sequence for each run + + results = [] + _logger.debug("Going to iterate over models now") + for model_count, model in enumerate(self.models_with_names): + _logger.debug(f"Doing model #{model_count}, {model[0]}") + subset_run = deepdog.subset_simulation.SubsetSimulation( + model, + self.dot_inputs, + measurements, + self.ss_n_c, + self.ss_n_s, + self.ss_m_max, + self.ss_target_cost, + self.ss_level_0_seed, + self.ss_mcmc_seed, + self.ss_use_adaptive_steps, + self.ss_default_phi_step, + self.ss_default_theta_step, + self.ss_default_r_step, + self.ss_default_w_log_step, + self.ss_default_upper_w_log_step, + ) + results.append(subset_run.execute()) + + _logger.debug("Done, constructing output now") + row = { + "dipole_moment_1": actual_dipoles.dipoles[0].p, + "dipole_location_1": actual_dipoles.dipoles[0].s, + "dipole_frequency_1": actual_dipoles.dipoles[0].w, + } + for i in range(1, self.n): + try: + current_dipoles = actual_dipoles.dipoles[i] + row[f"dipole_moment_{i+1}"] = current_dipoles.p + row[f"dipole_location_{i+1}"] = current_dipoles.s + row[f"dipole_frequency_{i+1}"] = current_dipoles.w + except IndexError: + _logger.info(f"Not writing anymore, saw end after {i}") + break + + likelihoods: List[float] = [] + + for (name, result) in zip(self.model_names, results): + likelihoods.append(result.over_target_likelihood) + row[f"{name}_likelihood"] = result.over_target_likelihood + + success_weight = sum( + [ + likelihood * prob + for likelihood, prob in zip(likelihoods, self.probabilities) + ] + ) + new_probabilities = [ + likelihood * old_prob / success_weight + for likelihood, old_prob in zip(likelihoods, self.probabilities) + ] + self.probabilities = new_probabilities + for name, probability in zip(self.model_names, self.probabilities): + row[f"{name}_prob"] = probability + _logger.info(row) + + with open(self.filename, "a", newline="") as outfile: + writer = csv.DictWriter( + outfile, fieldnames=self.csv_fields, dialect="unix" + ) + writer.writerow(row) + + if self.use_end_threshold: + max_prob = max(self.probabilities) + if max_prob > self.end_threshold: + _logger.info( + f"Aborting early, because {max_prob} is greater than {self.end_threshold}" + ) + break diff --git a/deepdog/subset_simulation/__init__.py b/deepdog/subset_simulation/__init__.py new file mode 100644 index 0000000..68b2fef --- /dev/null +++ b/deepdog/subset_simulation/__init__.py @@ -0,0 +1,3 @@ +from deepdog.subset_simulation.subset_simulation_impl import SubsetSimulation + +__all__ = ["SubsetSimulation"] diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py new file mode 100644 index 0000000..377ce43 --- /dev/null +++ b/deepdog/subset_simulation/subset_simulation_impl.py @@ -0,0 +1,310 @@ +import logging +import numpy +import pdme.measurement +import pdme.measurement.input_types +import pdme.subspace_simulation +import bisect +from typing import Sequence, Tuple, Optional + +from dataclasses import dataclass + +_logger = logging.getLogger(__name__) + + +@dataclass +class SubsetSimulationResult: + probs_list: Sequence[Tuple] + over_target_cost: Optional[float] + over_target_likelihood: Optional[float] + under_target_cost: Optional[float] + under_target_likelihood: Optional[float] + + +class SubsetSimulation: + def __init__( + self, + model_name_pair, + dot_inputs, + actual_measurements: Sequence[pdme.measurement.DotMeasurement], + n_c: int, + n_s: int, + m_max: int, + target_cost: Optional[float] = None, + level_0_seed: int = 200, + mcmc_seed: int = 20, + use_adaptive_steps=True, + default_phi_step=0.01, + default_theta_step=0.01, + default_r_step=0.01, + default_w_log_step=0.01, + default_upper_w_log_step=4, + ): + name, model = model_name_pair + self.model_name = name + self.model = model + _logger.info(f"got model {self.model_name}") + + self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array( + dot_inputs + ) + # _logger.debug(f"actual measurements: {actual_measurements}") + self.actual_measurement_array = numpy.array([m.v for m in actual_measurements]) + + def cost_function_to_use(dipoles_to_test): + return pdme.subspace_simulation.proportional_costs_vs_actual_measurement( + self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test + ) + + self.cost_function_to_use = cost_function_to_use + + self.n_c = n_c + self.n_s = n_s + self.m_max = m_max + + self.level_0_seed = level_0_seed + self.mcmc_seed = mcmc_seed + + self.use_adaptive_steps = use_adaptive_steps + self.default_phi_step = default_phi_step + self.default_theta_step = default_theta_step + self.default_r_step = default_r_step + self.default_w_log_step = default_w_log_step + self.default_upper_w_log_step = default_upper_w_log_step + + _logger.info(f"using params:") + _logger.info(f"\tn_c: {self.n_c}") + _logger.info(f"\tn_s: {self.n_s}") + _logger.info(f"\tm: {self.m_max}") + _logger.info(f"let's do level 0...") + + self.target_cost = target_cost + _logger.info(f"will stop at target cost {target_cost}") + + def execute(self) -> SubsetSimulationResult: + + probs_list = [] + + sample_dipoles = self.model.get_monte_carlo_dipole_inputs( + self.n_c * self.n_s, + -1, + rng_to_use=numpy.random.default_rng(self.level_0_seed), + ) + # _logger.debug(sample_dipoles) + # _logger.debug(sample_dipoles.shape) + costs = self.cost_function_to_use(sample_dipoles) + + _logger.debug(f"costs: {costs}") + sorted_indexes = costs.argsort()[::-1] + + _logger.debug(costs[sorted_indexes]) + _logger.debug(sample_dipoles[sorted_indexes]) + + sorted_costs = costs[sorted_indexes] + sorted_dipoles = sample_dipoles[sorted_indexes] + + threshold_cost = sorted_costs[-self.n_c] + + all_dipoles = numpy.array( + [ + pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp) + for samp in sorted_dipoles + ] + ) + all_chains = list(zip(sorted_costs, all_dipoles)) + + mcmc_rng = numpy.random.default_rng(self.mcmc_seed) + + for i in range(self.m_max): + next_seeds = all_chains[-self.n_c :] + + for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]): + probs_list.append( + ( + ((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s)) + / (self.n_s ** (i)), + cost_chain[0], + i + 1, + ) + ) + + next_seeds_as_array = numpy.array([s for _, s in next_seeds]) + + stdevs = self.get_stdevs_from_arrays(next_seeds_as_array) + _logger.info(f"got stdevs: {stdevs.stdevs}") + + all_chains = [] + for c, s in next_seeds: + # chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs) + # until new version gotta do + chain = self.model.get_mcmc_chain( + s, + self.cost_function_to_use, + self.n_s, + threshold_cost, + stdevs, + initial_cost=c, + rng_arg=mcmc_rng, + ) + for cost, chained in chain: + try: + filtered_cost = cost[0] + except IndexError: + filtered_cost = cost + all_chains.append((filtered_cost, chained)) + + # _logger.debug(all_chains) + + all_chains.sort(key=lambda c: c[0], reverse=True) + + threshold_cost = all_chains[-self.n_c][0] + _logger.info( + f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}" + ) + if (self.target_cost is not None) and (threshold_cost < self.target_cost): + _logger.info( + f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early" + ) + + cost_list = [c[0] for c in all_chains] + over_index = reverse_bisect_right(cost_list, self.target_cost) + + shorter_probs_list = [] + for cost_index, cost_chain in enumerate(all_chains): + probs_list.append( + ( + ((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s)) + / (self.n_s ** (i)), + cost_chain[0], + i + 1, + ) + ) + shorter_probs_list.append( + ( + cost_chain[0], + ((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s)) + / (self.n_s ** (i)), + ) + ) + # _logger.info(shorter_probs_list) + result = SubsetSimulationResult( + probs_list=probs_list, + over_target_cost=shorter_probs_list[over_index - 1][0], + over_target_likelihood=shorter_probs_list[over_index - 1][1], + under_target_cost=shorter_probs_list[over_index][0], + under_target_likelihood=shorter_probs_list[over_index][1], + ) + return result + + # _logger.debug([c[0] for c in all_chains[-n_c:]]) + _logger.info(f"doing level {i + 1}") + + for cost_index, cost_chain in enumerate(all_chains): + probs_list.append( + ( + ((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s)) + / (self.n_s ** (self.m_max)), + cost_chain[0], + self.m_max + 1, + ) + ) + threshold_cost = all_chains[-self.n_c][0] + _logger.info( + f"final threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{self.m_max + 1}" + ) + for a in all_chains[-10:]: + _logger.info(a) + # for prob, prob_cost in probs_list: + # _logger.info(f"\t{prob}: {prob_cost}") + probs_list.sort(key=lambda c: c[0], reverse=True) + result = SubsetSimulationResult( + probs_list=probs_list, + over_target_cost=None, + over_target_likelihood=None, + under_target_cost=None, + under_target_likelihood=None, + ) + return result + + def get_stdevs_from_arrays( + self, array + ) -> pdme.subspace_simulation.MCMCStandardDeviation: + # stdevs = get_stdevs_from_arrays(next_seeds_as_array, model) + if self.use_adaptive_steps: + + stdev_array = [] + count = array.shape[1] + for dipole_index in range(count): + selected = array[:, dipole_index] + pxs = selected[:, 0] + pys = selected[:, 1] + pzs = selected[:, 2] + thetas = numpy.arccos(pzs / self.model.pfixed) + phis = numpy.arctan2(pys, pxs) + + rstdevs = numpy.maximum( + numpy.std(selected, axis=0)[3:6], + self.default_r_step / (self.n_s * 10), + ) + frequency_stdevs = numpy.minimum( + numpy.maximum( + numpy.std(numpy.log(selected[:, -1])), + self.default_w_log_step / (self.n_s * 10), + ), + self.default_upper_w_log_step, + ) + stdev_array.append( + pdme.subspace_simulation.DipoleStandardDeviation( + p_theta_step=max( + numpy.std(thetas), self.default_theta_step / (self.n_s * 10) + ), + p_phi_step=max( + numpy.std(phis), self.default_phi_step / (self.n_s * 10) + ), + rx_step=rstdevs[0], + ry_step=rstdevs[1], + rz_step=rstdevs[2], + w_log_step=frequency_stdevs, + ) + ) + else: + default_stdev = pdme.subspace_simulation.DipoleStandardDeviation( + self.default_phi_step, + self.default_theta_step, + self.default_r_step, + self.default_r_step, + self.default_r_step, + self.default_w_log_step, + ) + stdev_array = [default_stdev] + stdevs = pdme.subspace_simulation.MCMCStandardDeviation(stdev_array) + return stdevs + + +def reverse_bisect_right(a, x, lo=0, hi=None): + """Return the index where to insert item x in list a, assuming a is sorted in descending order. + + The return value i is such that all e in a[:i] have e >= x, and all e in + a[i:] have e < x. So if x already appears in the list, a.insert(x) will + insert just after the rightmost x already there. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + + Essentially, the function returns number of elements in a which are >= than x. + >>> a = [8, 6, 5, 4, 2] + >>> reverse_bisect_right(a, 5) + 3 + >>> a[:reverse_bisect_right(a, 5)] + [8, 6, 5] + """ + if lo < 0: + raise ValueError("lo must be non-negative") + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo + hi) // 2 + if x > a[mid]: + hi = mid + else: + lo = mid + 1 + return lo diff --git a/poetry.lock b/poetry.lock index 42ad5ec..3fa9092 100644 --- a/poetry.lock +++ b/poetry.lock @@ -360,7 +360,7 @@ python-versions = ">=3.7" [[package]] name = "pdme" -version = "0.8.9" +version = "0.9.1" description = "Python dipole model evaluator" category = "main" optional = false @@ -730,7 +730,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-co [metadata] lock-version = "1.1" python-versions = ">=3.8.1,<3.10" -content-hash = "7a708c21744e80b6b5e94f7ef2d5b58f9539179d2c0dd1d9f17b3daa4151d52f" +content-hash = "0161af7edf18c16819f1ce083ab491c17c9809f2770219725131451b1a16a970" [metadata.files] black = [] diff --git a/pyproject.toml b/pyproject.toml index 6f375fc..f252cbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Deepak Mallubhotla "] [tool.poetry.dependencies] python = ">=3.8.1,<3.10" -pdme = "^0.8.9" +pdme = "^0.9.1" numpy = "1.22.3" scipy = "1.10" From fccf50eb274e2debc88a6f936fb03f2c7ed0c64b Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Mon, 24 Jul 2023 01:55:37 -0500 Subject: [PATCH 3/4] fmt: formatting improvements --- deepdog/__init__.py | 1 + deepdog/bayes_run_with_ss.py | 10 +++------- deepdog/subset_simulation/subset_simulation_impl.py | 5 ++--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/deepdog/__init__.py b/deepdog/__init__.py index 5db1f84..3ca9496 100644 --- a/deepdog/__init__.py +++ b/deepdog/__init__.py @@ -6,6 +6,7 @@ from deepdog.real_spectrum_run import RealSpectrumRun from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation + def get_version(): return __version__ diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py index ad73e00..a1aa688 100644 --- a/deepdog/bayes_run_with_ss.py +++ b/deepdog/bayes_run_with_ss.py @@ -8,7 +8,6 @@ import pdme.util.fast_nonlocal_spectrum from typing import Sequence, Tuple, List, Optional import datetime import csv -import multiprocessing import logging import numpy import numpy.typing @@ -55,7 +54,7 @@ class BayesRunWithSubspaceSimulation: filename_slug: str, max_frequency: float = 20, end_threshold: float = None, - run_count = 100, + run_count=100, chunksize: int = CHUNKSIZE, ss_n_c: int = 500, ss_n_s: int = 100, @@ -119,7 +118,7 @@ class BayesRunWithSubspaceSimulation: raise ValueError( f"end_threshold should be between 0 and 1, but is actually {end_threshold}" ) - + self.ss_n_c = ss_n_c self.ss_n_s = ss_n_s self.ss_m_max = ss_m_max @@ -145,10 +144,7 @@ class BayesRunWithSubspaceSimulation: # Generate the actual dipoles actual_dipoles = self.actual_model.get_dipoles(self.max_frequency) - measurements = actual_dipoles.get_dot_measurements( - self.dot_inputs - ) - + measurements = actual_dipoles.get_dot_measurements(self.dot_inputs) _logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}") diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py index 377ce43..029162a 100644 --- a/deepdog/subset_simulation/subset_simulation_impl.py +++ b/deepdog/subset_simulation/subset_simulation_impl.py @@ -3,7 +3,6 @@ import numpy import pdme.measurement import pdme.measurement.input_types import pdme.subspace_simulation -import bisect from typing import Sequence, Tuple, Optional from dataclasses import dataclass @@ -71,11 +70,11 @@ class SubsetSimulation: self.default_w_log_step = default_w_log_step self.default_upper_w_log_step = default_upper_w_log_step - _logger.info(f"using params:") + _logger.info("using params:") _logger.info(f"\tn_c: {self.n_c}") _logger.info(f"\tn_s: {self.n_s}") _logger.info(f"\tm: {self.m_max}") - _logger.info(f"let's do level 0...") + _logger.info("let's do level 0...") self.target_cost = target_cost _logger.info(f"will stop at target cost {target_cost}") From d6e6876a79b520caea17ace6cc1ea63a824bdcc6 Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Mon, 24 Jul 2023 01:59:07 -0500 Subject: [PATCH 4/4] fmt: fixes some linting issues --- deepdog/bayes_run_with_ss.py | 3 +++ deepdog/subset_simulation/subset_simulation_impl.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py index a1aa688..f8f1c17 100644 --- a/deepdog/bayes_run_with_ss.py +++ b/deepdog/bayes_run_with_ss.py @@ -192,6 +192,9 @@ class BayesRunWithSubspaceSimulation: likelihoods: List[float] = [] for (name, result) in zip(self.model_names, results): + if result.over_target_likelihood is None: + _logger.error("got a none result, bye") + return likelihoods.append(result.over_target_likelihood) row[f"{name}_likelihood"] = result.over_target_likelihood diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py index 029162a..c5c0ea7 100644 --- a/deepdog/subset_simulation/subset_simulation_impl.py +++ b/deepdog/subset_simulation/subset_simulation_impl.py @@ -114,7 +114,7 @@ class SubsetSimulation: mcmc_rng = numpy.random.default_rng(self.mcmc_seed) for i in range(self.m_max): - next_seeds = all_chains[-self.n_c :] + next_seeds = all_chains[-self.n_c:] for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]): probs_list.append(