From ad521ba472066d4ada23092a747a46e31a9e306e Mon Sep 17 00:00:00 2001
From: Deepak Mallubhotla <dmallubhotla+github@gmail.com>
Date: Sun, 23 Jul 2023 18:45:53 -0500
Subject: [PATCH 1/4] deps: upgrades pdme version to use mcmc code

---
 poetry.lock    | 113 ++++++++++---------------------------------------
 pyproject.toml |   4 +-
 2 files changed, 24 insertions(+), 93 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 55cf0c6..42ad5ec 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -94,7 +94,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7
 
 [[package]]
 name = "coverage"
-version = "7.2.3"
+version = "7.2.7"
 description = "Code coverage measurement for Python"
 category = "dev"
 optional = false
@@ -360,11 +360,11 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "pdme"
-version = "0.8.8"
+version = "0.8.9"
 description = "Python dipole model evaluator"
 category = "main"
 optional = false
-python-versions = ">=3.8,<3.10"
+python-versions = ">=3.8.1,<3.10"
 
 [package.dependencies]
 numpy = ">=1.22.3,<2.0.0"
@@ -729,8 +729,8 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-co
 
 [metadata]
 lock-version = "1.1"
-python-versions = "^3.8,<3.10"
-content-hash = "d32b74325a18dc187501980f37d128a2a07d7bb0e4ea2c5cb14cf14f8b7a0222"
+python-versions = ">=3.8.1,<3.10"
+content-hash = "7a708c21744e80b6b5e94f7ef2d5b58f9539179d2c0dd1d9f17b3daa4151d52f"
 
 [metadata.files]
 black = []
@@ -738,10 +738,7 @@ bleach = []
 certifi = []
 cffi = []
 charset-normalizer = []
-click = [
-    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
-    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
-]
+click = []
 click-log = []
 colorama = []
 coverage = []
@@ -749,10 +746,7 @@ cryptography = []
 docutils = []
 dotty-dict = []
 exceptiongroup = []
-flake8 = [
-    {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"},
-    {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
-]
+flake8 = []
 gitdb = []
 gitpython = []
 idna = []
@@ -763,103 +757,40 @@ invoke = []
 "jaraco.classes" = []
 jeepney = []
 keyring = []
-mccabe = [
-    {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
-    {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
-]
+mccabe = []
 more-itertools = []
 mypy = []
 mypy-extensions = []
-numpy = [
-    {file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"},
-    {file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"},
-    {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"},
-    {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"},
-    {file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"},
-    {file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"},
-    {file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"},
-    {file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"},
-    {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"},
-    {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"},
-    {file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"},
-    {file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"},
-    {file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"},
-    {file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"},
-    {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"},
-    {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"},
-    {file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"},
-    {file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"},
-    {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"},
-    {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"},
-]
+numpy = []
 packaging = []
 pathspec = []
 pdme = []
 pkginfo = []
 platformdirs = []
-pluggy = [
-    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
-    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
-]
-pycodestyle = [
-    {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"},
-    {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"},
-]
-pycparser = [
-    {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
-    {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
-]
-pyflakes = [
-    {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"},
-    {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"},
-]
+pluggy = []
+pycodestyle = []
+pycparser = []
+pyflakes = []
 pygments = []
 pytest = []
-pytest-cov = [
-    {file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"},
-    {file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"},
-]
+pytest-cov = []
 python-gitlab = []
 python-semantic-release = []
-pywin32-ctypes = [
-    {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"},
-    {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"},
-]
+pywin32-ctypes = []
 readme-renderer = []
 requests = []
 requests-toolbelt = []
-rfc3986 = [
-    {file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"},
-    {file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"},
-]
+rfc3986 = []
 scipy = []
 secretstorage = []
-semver = [
-    {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"},
-    {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"},
-]
-six = [
-    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
-    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
-smmap = [
-    {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
-    {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
-]
-tomli = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
-]
+semver = []
+six = []
+smmap = []
+tomli = []
 tomlkit = []
 tqdm = []
-twine = [
-    {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"},
-    {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"},
-]
+twine = []
 typing-extensions = []
 urllib3 = []
-webencodings = [
-    {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
-    {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
-]
+webencodings = []
 zipp = []
diff --git a/pyproject.toml b/pyproject.toml
index 355f8f2..6f375fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,8 @@ description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
 
 [tool.poetry.dependencies]
-python = "^3.8,<3.10"
-pdme = "^0.8.6"
+python = ">=3.8.1,<3.10"
+pdme = "^0.8.9"
 numpy = "1.22.3"
 scipy = "1.10"
 

From 33cab9ab4179cec13ae9e591a8ffc32df4dda989 Mon Sep 17 00:00:00 2001
From: Deepak Mallubhotla <dmallubhotla+github@gmail.com>
Date: Mon, 24 Jul 2023 01:50:56 -0500
Subject: [PATCH 2/4] feat: adds subset simulation stuff

---
 deepdog/__init__.py                           |   3 +-
 deepdog/bayes_run_with_ss.py                  | 229 +++++++++++++
 deepdog/subset_simulation/__init__.py         |   3 +
 .../subset_simulation_impl.py                 | 310 ++++++++++++++++++
 poetry.lock                                   |   4 +-
 pyproject.toml                                |   2 +-
 6 files changed, 547 insertions(+), 4 deletions(-)
 create mode 100644 deepdog/bayes_run_with_ss.py
 create mode 100644 deepdog/subset_simulation/__init__.py
 create mode 100644 deepdog/subset_simulation/subset_simulation_impl.py

diff --git a/deepdog/__init__.py b/deepdog/__init__.py
index 8dc5987..5db1f84 100644
--- a/deepdog/__init__.py
+++ b/deepdog/__init__.py
@@ -4,7 +4,7 @@ from deepdog.bayes_run import BayesRun
 from deepdog.bayes_run_simulpairs import BayesRunSimulPairs
 from deepdog.real_spectrum_run import RealSpectrumRun
 from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun
-
+from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation
 
 def get_version():
 	return __version__
@@ -16,6 +16,7 @@ __all__ = [
 	"BayesRunSimulPairs",
 	"RealSpectrumRun",
 	"TempAwareRealSpectrumRun",
+	"BayesRunWithSubspaceSimulation",
 ]
 
 
diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py
new file mode 100644
index 0000000..ad73e00
--- /dev/null
+++ b/deepdog/bayes_run_with_ss.py
@@ -0,0 +1,229 @@
+import deepdog.subset_simulation
+import pdme.inputs
+import pdme.model
+import pdme.measurement.input_types
+import pdme.measurement.oscillating_dipole
+import pdme.util.fast_v_calc
+import pdme.util.fast_nonlocal_spectrum
+from typing import Sequence, Tuple, List, Optional
+import datetime
+import csv
+import multiprocessing
+import logging
+import numpy
+import numpy.typing
+
+
+# TODO: remove hardcode
+CHUNKSIZE = 50
+
+# TODO: It's garbage to have this here duplicated from pdme.
+DotInput = Tuple[numpy.typing.ArrayLike, float]
+
+
+_logger = logging.getLogger(__name__)
+
+
+class BayesRunWithSubspaceSimulation:
+	"""
+	A single Bayes run for a given set of dots.
+
+	Parameters
+	----------
+	dot_inputs : Sequence[DotInput]
+	The dot inputs for this bayes run.
+
+	models_with_names : Sequence[Tuple(str, pdme.model.DipoleModel)]
+	The models to evaluate.
+
+	actual_model : pdme.model.DipoleModel
+	The model which is actually correct.
+
+	filename_slug : str
+	The filename slug to include.
+
+	run_count: int
+	The number of runs to do.
+	"""
+
+	def __init__(
+		self,
+		dot_positions: Sequence[numpy.typing.ArrayLike],
+		frequency_range: Sequence[float],
+		models_with_names: Sequence[Tuple[str, pdme.model.DipoleModel]],
+		actual_model: pdme.model.DipoleModel,
+		filename_slug: str,
+		max_frequency: float = 20,
+		end_threshold: float = None,
+		run_count = 100,
+		chunksize: int = CHUNKSIZE,
+		ss_n_c: int = 500,
+		ss_n_s: int = 100,
+		ss_m_max: int = 15,
+		ss_target_cost: Optional[float] = None,
+		ss_level_0_seed: int = 200,
+		ss_mcmc_seed: int = 20,
+		ss_use_adaptive_steps=True,
+		ss_default_phi_step=0.01,
+		ss_default_theta_step=0.01,
+		ss_default_r_step=0.01,
+		ss_default_w_log_step=0.01,
+		ss_default_upper_w_log_step=4,
+	) -> None:
+		self.dot_inputs = pdme.inputs.inputs_with_frequency_range(
+			dot_positions, frequency_range
+		)
+		self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
+			self.dot_inputs
+		)
+
+		self.models_with_names = models_with_names
+		self.models = [model for (_, model) in models_with_names]
+		self.model_names = [name for (name, _) in models_with_names]
+		self.actual_model = actual_model
+
+		self.n: int
+		try:
+			self.n = self.actual_model.n  # type: ignore
+		except AttributeError:
+			self.n = 1
+
+		self.model_count = len(self.models)
+
+		self.csv_fields = []
+		for i in range(self.n):
+			self.csv_fields.extend(
+				[
+					f"dipole_moment_{i+1}",
+					f"dipole_location_{i+1}",
+					f"dipole_frequency_{i+1}",
+				]
+			)
+		self.compensate_zeros = True
+		self.chunksize = chunksize
+		for name in self.model_names:
+			self.csv_fields.extend([f"{name}_likelihood", f"{name}_prob"])
+
+		self.probabilities = [1 / self.model_count] * self.model_count
+
+		timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+		self.filename = f"{timestamp}-{filename_slug}.bayesrunwithss.csv"
+		self.max_frequency = max_frequency
+
+		if end_threshold is not None:
+			if 0 < end_threshold < 1:
+				self.end_threshold: float = end_threshold
+				self.use_end_threshold = True
+				_logger.info(f"Will abort early, at {self.end_threshold}.")
+			else:
+				raise ValueError(
+					f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
+				)
+		
+		self.ss_n_c = ss_n_c
+		self.ss_n_s = ss_n_s
+		self.ss_m_max = ss_m_max
+		self.ss_target_cost = ss_target_cost
+		self.ss_level_0_seed = ss_level_0_seed
+		self.ss_mcmc_seed = ss_mcmc_seed
+		self.ss_use_adaptive_steps = ss_use_adaptive_steps
+		self.ss_default_phi_step = ss_default_phi_step
+		self.ss_default_theta_step = ss_default_theta_step
+		self.ss_default_r_step = ss_default_r_step
+		self.ss_default_w_log_step = ss_default_w_log_step
+		self.ss_default_upper_w_log_step = ss_default_upper_w_log_step
+
+		self.run_count = run_count
+
+	def go(self) -> None:
+		with open(self.filename, "a", newline="") as outfile:
+			writer = csv.DictWriter(outfile, fieldnames=self.csv_fields, dialect="unix")
+			writer.writeheader()
+
+		for run in range(1, self.run_count + 1):
+
+			# Generate the actual dipoles
+			actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
+
+			measurements = actual_dipoles.get_dot_measurements(
+				self.dot_inputs
+			)
+
+
+			_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
+
+			# define a new seed sequence for each run
+
+			results = []
+			_logger.debug("Going to iterate over models now")
+			for model_count, model in enumerate(self.models_with_names):
+				_logger.debug(f"Doing model #{model_count}, {model[0]}")
+				subset_run = deepdog.subset_simulation.SubsetSimulation(
+					model,
+					self.dot_inputs,
+					measurements,
+					self.ss_n_c,
+					self.ss_n_s,
+					self.ss_m_max,
+					self.ss_target_cost,
+					self.ss_level_0_seed,
+					self.ss_mcmc_seed,
+					self.ss_use_adaptive_steps,
+					self.ss_default_phi_step,
+					self.ss_default_theta_step,
+					self.ss_default_r_step,
+					self.ss_default_w_log_step,
+					self.ss_default_upper_w_log_step,
+				)
+				results.append(subset_run.execute())
+
+			_logger.debug("Done, constructing output now")
+			row = {
+				"dipole_moment_1": actual_dipoles.dipoles[0].p,
+				"dipole_location_1": actual_dipoles.dipoles[0].s,
+				"dipole_frequency_1": actual_dipoles.dipoles[0].w,
+			}
+			for i in range(1, self.n):
+				try:
+					current_dipoles = actual_dipoles.dipoles[i]
+					row[f"dipole_moment_{i+1}"] = current_dipoles.p
+					row[f"dipole_location_{i+1}"] = current_dipoles.s
+					row[f"dipole_frequency_{i+1}"] = current_dipoles.w
+				except IndexError:
+					_logger.info(f"Not writing anymore, saw end after {i}")
+					break
+
+			likelihoods: List[float] = []
+
+			for (name, result) in zip(self.model_names, results):
+				likelihoods.append(result.over_target_likelihood)
+				row[f"{name}_likelihood"] = result.over_target_likelihood
+
+			success_weight = sum(
+				[
+					likelihood * prob
+					for likelihood, prob in zip(likelihoods, self.probabilities)
+				]
+			)
+			new_probabilities = [
+				likelihood * old_prob / success_weight
+				for likelihood, old_prob in zip(likelihoods, self.probabilities)
+			]
+			self.probabilities = new_probabilities
+			for name, probability in zip(self.model_names, self.probabilities):
+				row[f"{name}_prob"] = probability
+			_logger.info(row)
+
+			with open(self.filename, "a", newline="") as outfile:
+				writer = csv.DictWriter(
+					outfile, fieldnames=self.csv_fields, dialect="unix"
+				)
+				writer.writerow(row)
+
+			if self.use_end_threshold:
+				max_prob = max(self.probabilities)
+				if max_prob > self.end_threshold:
+					_logger.info(
+						f"Aborting early, because {max_prob} is greater than {self.end_threshold}"
+					)
+					break
diff --git a/deepdog/subset_simulation/__init__.py b/deepdog/subset_simulation/__init__.py
new file mode 100644
index 0000000..68b2fef
--- /dev/null
+++ b/deepdog/subset_simulation/__init__.py
@@ -0,0 +1,3 @@
+from deepdog.subset_simulation.subset_simulation_impl import SubsetSimulation
+
+__all__ = ["SubsetSimulation"]
diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py
new file mode 100644
index 0000000..377ce43
--- /dev/null
+++ b/deepdog/subset_simulation/subset_simulation_impl.py
@@ -0,0 +1,310 @@
+import logging
+import numpy
+import pdme.measurement
+import pdme.measurement.input_types
+import pdme.subspace_simulation
+import bisect
+from typing import Sequence, Tuple, Optional
+
+from dataclasses import dataclass
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SubsetSimulationResult:
+	probs_list: Sequence[Tuple]
+	over_target_cost: Optional[float]
+	over_target_likelihood: Optional[float]
+	under_target_cost: Optional[float]
+	under_target_likelihood: Optional[float]
+
+
+class SubsetSimulation:
+	def __init__(
+		self,
+		model_name_pair,
+		dot_inputs,
+		actual_measurements: Sequence[pdme.measurement.DotMeasurement],
+		n_c: int,
+		n_s: int,
+		m_max: int,
+		target_cost: Optional[float] = None,
+		level_0_seed: int = 200,
+		mcmc_seed: int = 20,
+		use_adaptive_steps=True,
+		default_phi_step=0.01,
+		default_theta_step=0.01,
+		default_r_step=0.01,
+		default_w_log_step=0.01,
+		default_upper_w_log_step=4,
+	):
+		name, model = model_name_pair
+		self.model_name = name
+		self.model = model
+		_logger.info(f"got model {self.model_name}")
+
+		self.dot_inputs_array = pdme.measurement.input_types.dot_inputs_to_array(
+			dot_inputs
+		)
+		# _logger.debug(f"actual measurements: {actual_measurements}")
+		self.actual_measurement_array = numpy.array([m.v for m in actual_measurements])
+
+		def cost_function_to_use(dipoles_to_test):
+			return pdme.subspace_simulation.proportional_costs_vs_actual_measurement(
+				self.dot_inputs_array, self.actual_measurement_array, dipoles_to_test
+			)
+
+		self.cost_function_to_use = cost_function_to_use
+
+		self.n_c = n_c
+		self.n_s = n_s
+		self.m_max = m_max
+
+		self.level_0_seed = level_0_seed
+		self.mcmc_seed = mcmc_seed
+
+		self.use_adaptive_steps = use_adaptive_steps
+		self.default_phi_step = default_phi_step
+		self.default_theta_step = default_theta_step
+		self.default_r_step = default_r_step
+		self.default_w_log_step = default_w_log_step
+		self.default_upper_w_log_step = default_upper_w_log_step
+
+		_logger.info(f"using params:")
+		_logger.info(f"\tn_c: {self.n_c}")
+		_logger.info(f"\tn_s: {self.n_s}")
+		_logger.info(f"\tm: {self.m_max}")
+		_logger.info(f"let's do level 0...")
+
+		self.target_cost = target_cost
+		_logger.info(f"will stop at target cost {target_cost}")
+
+	def execute(self) -> SubsetSimulationResult:
+
+		probs_list = []
+
+		sample_dipoles = self.model.get_monte_carlo_dipole_inputs(
+			self.n_c * self.n_s,
+			-1,
+			rng_to_use=numpy.random.default_rng(self.level_0_seed),
+		)
+		# _logger.debug(sample_dipoles)
+		# _logger.debug(sample_dipoles.shape)
+		costs = self.cost_function_to_use(sample_dipoles)
+
+		_logger.debug(f"costs: {costs}")
+		sorted_indexes = costs.argsort()[::-1]
+
+		_logger.debug(costs[sorted_indexes])
+		_logger.debug(sample_dipoles[sorted_indexes])
+
+		sorted_costs = costs[sorted_indexes]
+		sorted_dipoles = sample_dipoles[sorted_indexes]
+
+		threshold_cost = sorted_costs[-self.n_c]
+
+		all_dipoles = numpy.array(
+			[
+				pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(samp)
+				for samp in sorted_dipoles
+			]
+		)
+		all_chains = list(zip(sorted_costs, all_dipoles))
+
+		mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
+
+		for i in range(self.m_max):
+			next_seeds = all_chains[-self.n_c :]
+
+			for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
+				probs_list.append(
+					(
+						((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
+						/ (self.n_s ** (i)),
+						cost_chain[0],
+						i + 1,
+					)
+				)
+
+			next_seeds_as_array = numpy.array([s for _, s in next_seeds])
+
+			stdevs = self.get_stdevs_from_arrays(next_seeds_as_array)
+			_logger.info(f"got stdevs: {stdevs.stdevs}")
+
+			all_chains = []
+			for c, s in next_seeds:
+				# chain = mcmc(s, threshold_cost, n_s, model, dot_inputs_array, actual_measurement_array, mcmc_rng, curr_cost=c, stdevs=stdevs)
+				# until new version gotta do
+				chain = self.model.get_mcmc_chain(
+					s,
+					self.cost_function_to_use,
+					self.n_s,
+					threshold_cost,
+					stdevs,
+					initial_cost=c,
+					rng_arg=mcmc_rng,
+				)
+				for cost, chained in chain:
+					try:
+						filtered_cost = cost[0]
+					except IndexError:
+						filtered_cost = cost
+					all_chains.append((filtered_cost, chained))
+
+			# _logger.debug(all_chains)
+
+			all_chains.sort(key=lambda c: c[0], reverse=True)
+
+			threshold_cost = all_chains[-self.n_c][0]
+			_logger.info(
+				f"current threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{i + 1}"
+			)
+			if (self.target_cost is not None) and (threshold_cost < self.target_cost):
+				_logger.info(
+					f"got a threshold cost {threshold_cost}, less than {self.target_cost}. will leave early"
+				)
+
+				cost_list = [c[0] for c in all_chains]
+				over_index = reverse_bisect_right(cost_list, self.target_cost)
+
+				shorter_probs_list = []
+				for cost_index, cost_chain in enumerate(all_chains):
+					probs_list.append(
+						(
+							((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
+							/ (self.n_s ** (i)),
+							cost_chain[0],
+							i + 1,
+						)
+					)
+					shorter_probs_list.append(
+						(
+							cost_chain[0],
+							((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
+							/ (self.n_s ** (i)),
+						)
+					)
+				# _logger.info(shorter_probs_list)
+				result = SubsetSimulationResult(
+					probs_list=probs_list,
+					over_target_cost=shorter_probs_list[over_index - 1][0],
+					over_target_likelihood=shorter_probs_list[over_index - 1][1],
+					under_target_cost=shorter_probs_list[over_index][0],
+					under_target_likelihood=shorter_probs_list[over_index][1],
+				)
+				return result
+
+			# _logger.debug([c[0] for c in all_chains[-n_c:]])
+			_logger.info(f"doing level {i + 1}")
+
+		for cost_index, cost_chain in enumerate(all_chains):
+			probs_list.append(
+				(
+					((self.n_c * self.n_s - cost_index) / (self.n_c * self.n_s))
+					/ (self.n_s ** (self.m_max)),
+					cost_chain[0],
+					self.m_max + 1,
+				)
+			)
+		threshold_cost = all_chains[-self.n_c][0]
+		_logger.info(
+			f"final threshold cost: {threshold_cost}, at P = (1 / {self.n_s})^{self.m_max + 1}"
+		)
+		for a in all_chains[-10:]:
+			_logger.info(a)
+		# for prob, prob_cost in probs_list:
+		# 	_logger.info(f"\t{prob}: {prob_cost}")
+		probs_list.sort(key=lambda c: c[0], reverse=True)
+		result = SubsetSimulationResult(
+			probs_list=probs_list,
+			over_target_cost=None,
+			over_target_likelihood=None,
+			under_target_cost=None,
+			under_target_likelihood=None,
+		)
+		return result
+
+	def get_stdevs_from_arrays(
+		self, array
+	) -> pdme.subspace_simulation.MCMCStandardDeviation:
+		# stdevs = get_stdevs_from_arrays(next_seeds_as_array, model)
+		if self.use_adaptive_steps:
+
+			stdev_array = []
+			count = array.shape[1]
+			for dipole_index in range(count):
+				selected = array[:, dipole_index]
+				pxs = selected[:, 0]
+				pys = selected[:, 1]
+				pzs = selected[:, 2]
+				thetas = numpy.arccos(pzs / self.model.pfixed)
+				phis = numpy.arctan2(pys, pxs)
+
+				rstdevs = numpy.maximum(
+					numpy.std(selected, axis=0)[3:6],
+					self.default_r_step / (self.n_s * 10),
+				)
+				frequency_stdevs = numpy.minimum(
+					numpy.maximum(
+						numpy.std(numpy.log(selected[:, -1])),
+						self.default_w_log_step / (self.n_s * 10),
+					),
+					self.default_upper_w_log_step,
+				)
+				stdev_array.append(
+					pdme.subspace_simulation.DipoleStandardDeviation(
+						p_theta_step=max(
+							numpy.std(thetas), self.default_theta_step / (self.n_s * 10)
+						),
+						p_phi_step=max(
+							numpy.std(phis), self.default_phi_step / (self.n_s * 10)
+						),
+						rx_step=rstdevs[0],
+						ry_step=rstdevs[1],
+						rz_step=rstdevs[2],
+						w_log_step=frequency_stdevs,
+					)
+				)
+		else:
+			default_stdev = pdme.subspace_simulation.DipoleStandardDeviation(
+				self.default_phi_step,
+				self.default_theta_step,
+				self.default_r_step,
+				self.default_r_step,
+				self.default_r_step,
+				self.default_w_log_step,
+			)
+			stdev_array = [default_stdev]
+		stdevs = pdme.subspace_simulation.MCMCStandardDeviation(stdev_array)
+		return stdevs
+
+
+def reverse_bisect_right(a, x, lo=0, hi=None):
+	"""Return the index where to insert item x in list a, assuming a is sorted in descending order.
+
+	The return value i is such that all e in a[:i] have e >= x, and all e in
+	a[i:] have e < x.  So if x already appears in the list, a.insert(x) will
+	insert just after the rightmost x already there.
+
+	Optional args lo (default 0) and hi (default len(a)) bound the
+	slice of a to be searched.
+
+	Essentially, the function returns number of elements in a which are >= than x.
+	>>> a = [8, 6, 5, 4, 2]
+	>>> reverse_bisect_right(a, 5)
+	3
+	>>> a[:reverse_bisect_right(a, 5)]
+	[8, 6, 5]
+	"""
+	if lo < 0:
+		raise ValueError("lo must be non-negative")
+	if hi is None:
+		hi = len(a)
+	while lo < hi:
+		mid = (lo + hi) // 2
+		if x > a[mid]:
+			hi = mid
+		else:
+			lo = mid + 1
+	return lo
diff --git a/poetry.lock b/poetry.lock
index 42ad5ec..3fa9092 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -360,7 +360,7 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "pdme"
-version = "0.8.9"
+version = "0.9.1"
 description = "Python dipole model evaluator"
 category = "main"
 optional = false
@@ -730,7 +730,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-co
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.8.1,<3.10"
-content-hash = "7a708c21744e80b6b5e94f7ef2d5b58f9539179d2c0dd1d9f17b3daa4151d52f"
+content-hash = "0161af7edf18c16819f1ce083ab491c17c9809f2770219725131451b1a16a970"
 
 [metadata.files]
 black = []
diff --git a/pyproject.toml b/pyproject.toml
index 6f375fc..f252cbd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<3.10"
-pdme = "^0.8.9"
+pdme = "^0.9.1"
 numpy = "1.22.3"
 scipy = "1.10"
 

From fccf50eb274e2debc88a6f936fb03f2c7ed0c64b Mon Sep 17 00:00:00 2001
From: Deepak Mallubhotla <dmallubhotla+github@gmail.com>
Date: Mon, 24 Jul 2023 01:55:37 -0500
Subject: [PATCH 3/4] fmt: formatting improvements

---
 deepdog/__init__.py                                 |  1 +
 deepdog/bayes_run_with_ss.py                        | 10 +++-------
 deepdog/subset_simulation/subset_simulation_impl.py |  5 ++---
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/deepdog/__init__.py b/deepdog/__init__.py
index 5db1f84..3ca9496 100644
--- a/deepdog/__init__.py
+++ b/deepdog/__init__.py
@@ -6,6 +6,7 @@ from deepdog.real_spectrum_run import RealSpectrumRun
 from deepdog.temp_aware_real_spectrum_run import TempAwareRealSpectrumRun
 from deepdog.bayes_run_with_ss import BayesRunWithSubspaceSimulation
 
+
 def get_version():
 	return __version__
 
diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py
index ad73e00..a1aa688 100644
--- a/deepdog/bayes_run_with_ss.py
+++ b/deepdog/bayes_run_with_ss.py
@@ -8,7 +8,6 @@ import pdme.util.fast_nonlocal_spectrum
 from typing import Sequence, Tuple, List, Optional
 import datetime
 import csv
-import multiprocessing
 import logging
 import numpy
 import numpy.typing
@@ -55,7 +54,7 @@ class BayesRunWithSubspaceSimulation:
 		filename_slug: str,
 		max_frequency: float = 20,
 		end_threshold: float = None,
-		run_count = 100,
+		run_count=100,
 		chunksize: int = CHUNKSIZE,
 		ss_n_c: int = 500,
 		ss_n_s: int = 100,
@@ -119,7 +118,7 @@ class BayesRunWithSubspaceSimulation:
 				raise ValueError(
 					f"end_threshold should be between 0 and 1, but is actually {end_threshold}"
 				)
-		
+
 		self.ss_n_c = ss_n_c
 		self.ss_n_s = ss_n_s
 		self.ss_m_max = ss_m_max
@@ -145,10 +144,7 @@ class BayesRunWithSubspaceSimulation:
 			# Generate the actual dipoles
 			actual_dipoles = self.actual_model.get_dipoles(self.max_frequency)
 
-			measurements = actual_dipoles.get_dot_measurements(
-				self.dot_inputs
-			)
-
+			measurements = actual_dipoles.get_dot_measurements(self.dot_inputs)
 
 			_logger.info(f"Going to work on dipole at {actual_dipoles.dipoles}")
 
diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py
index 377ce43..029162a 100644
--- a/deepdog/subset_simulation/subset_simulation_impl.py
+++ b/deepdog/subset_simulation/subset_simulation_impl.py
@@ -3,7 +3,6 @@ import numpy
 import pdme.measurement
 import pdme.measurement.input_types
 import pdme.subspace_simulation
-import bisect
 from typing import Sequence, Tuple, Optional
 
 from dataclasses import dataclass
@@ -71,11 +70,11 @@ class SubsetSimulation:
 		self.default_w_log_step = default_w_log_step
 		self.default_upper_w_log_step = default_upper_w_log_step
 
-		_logger.info(f"using params:")
+		_logger.info("using params:")
 		_logger.info(f"\tn_c: {self.n_c}")
 		_logger.info(f"\tn_s: {self.n_s}")
 		_logger.info(f"\tm: {self.m_max}")
-		_logger.info(f"let's do level 0...")
+		_logger.info("let's do level 0...")
 
 		self.target_cost = target_cost
 		_logger.info(f"will stop at target cost {target_cost}")

From d6e6876a79b520caea17ace6cc1ea63a824bdcc6 Mon Sep 17 00:00:00 2001
From: Deepak Mallubhotla <dmallubhotla+github@gmail.com>
Date: Mon, 24 Jul 2023 01:59:07 -0500
Subject: [PATCH 4/4] fmt: fixes some linting issues

---
 deepdog/bayes_run_with_ss.py                        | 3 +++
 deepdog/subset_simulation/subset_simulation_impl.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py
index a1aa688..f8f1c17 100644
--- a/deepdog/bayes_run_with_ss.py
+++ b/deepdog/bayes_run_with_ss.py
@@ -192,6 +192,9 @@ class BayesRunWithSubspaceSimulation:
 			likelihoods: List[float] = []
 
 			for (name, result) in zip(self.model_names, results):
+				if result.over_target_likelihood is None:
+					_logger.error("got a none result, bye")
+					return
 				likelihoods.append(result.over_target_likelihood)
 				row[f"{name}_likelihood"] = result.over_target_likelihood
 
diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py
index 029162a..c5c0ea7 100644
--- a/deepdog/subset_simulation/subset_simulation_impl.py
+++ b/deepdog/subset_simulation/subset_simulation_impl.py
@@ -114,7 +114,7 @@ class SubsetSimulation:
 		mcmc_rng = numpy.random.default_rng(self.mcmc_seed)
 
 		for i in range(self.m_max):
-			next_seeds = all_chains[-self.n_c :]
+			next_seeds = all_chains[-self.n_c:]
 
 			for cost_index, cost_chain in enumerate(all_chains[: -self.n_c]):
 				probs_list.append(