initial commit

2024-09-04 14:38:47 -05:00
parent e0aa5f0535
commit 287e7d174b
26 changed files with 2332 additions and 0 deletions
--- a/.flake8
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+ignore = W191, E501, W503, E203
+max-line-length = 120
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+* text=auto
+
+*.py text diff=python
--- a/.versionrc
+++ b/.versionrc
@@ -0,0 +1,10 @@
+{
+	"bumpFiles": [
+		{
+			"filename": "pyproject.toml",
+			"updater": "scripts/standard-version/pyproject-updater.js"
+		}
+	],
+	"sign": true,
+	"tag-prefix": ""
+}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,28 @@
+# 
+
+Need to have a dots.json, indexes.json, the other jsons get generated.
+
+
+# soucres of truth
+dots.json
+indexes.json
+
+costs
+10.0 5.0 1.0 0.5 0.1 0.06
+02-run_gen.sh
+
+
+
+# Ideas
+- can calculate some calibration curves for successes? and maybe some brier scores? but probably not 
+
+# Variables to change
+- count: 1 or 10, also use that for ddog
+- Frequency range?
+	- We may require some additional work to get automatic merging of multiple time series a la Connors paper
+- Measurement type?
+	- Let's do Ex field for now and maybe try potential later in a sd4 run
+
+# Procedure
+- `run.sh`
+- `clean.sh` to clean things
--- a/clean.sh
+++ b/clean.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+# script to do the thing
+
+set -euxo pipefail
+
+rm -f output.log
+rm -rf logs
+rm -rf out
--- a/dots.json
+++ b/dots.json
@@ -0,0 +1,30 @@
+[
+	{
+		"r": [5, 0, 0],
+		"label": "dot1"
+	},
+	{
+		"r": [-5, 0, 0],
+		"label": "dot2"
+	},
+	{
+		"r": [0, 0, 0],
+		"label": "line"
+	},
+	{
+		"r": [-5, 5, 0],
+		"label": "triangle1"
+	},
+	{
+		"r": [0, 3.5, 0],
+		"label": "triangle2"
+	},
+	{
+		"r": [-5, 5, 0.5],
+		"label": "uprise1"
+	},
+	{
+		"r": [0, 3.5, 0.5],
+		"label": "uprise2"
+	}
+]
--- a/indexes.json
+++ b/indexes.json
@@ -0,0 +1,13 @@
+{
+	"indexes": {
+		"occupancies": [
+			1, 10
+		],
+		"magnitudes": [
+			1, 2, 3
+		],
+		"orientations": [
+			"fixedxy"
+		]
+	}
+}
--- a/61
+++ b/61
@@ -0,0 +1,61 @@
+
+# execute default build
+default: fmt
+
+# builds the python module using poetry
+# build:
+# 	echo "building..."
+# 	poetry build
+
+# print a message displaying whether nix is being used
+checknix:
+	#!/usr/bin/env bash
+	set -euxo pipefail
+	if [[ "${DO_NIX_CUSTOM:=0}" -eq 1 ]]; then
+		echo "In an interactive nix env."
+	else
+		echo "Using poetry as runner, no nix detected."
+	fi
+
+# # run all tests
+# test: fmt
+# 	#!/usr/bin/env bash
+# 	set -euxo pipefail
+	
+# 	if [[ "${DO_NIX_CUSTOM:=0}" -eq 1 ]]; then
+# 		echo "testing, using nix..."
+# 		flake8 deepdog tests
+# 		mypy deepdog
+# 		pytest
+# 	else
+# 		echo "testing..."
+# 		poetry run flake8 deepdog tests
+# 		poetry run mypy deepdog
+# 		poetry run pytest
+# 	fi
+
+# format code
+fmt:
+	#!/usr/bin/env bash
+	set -euxo pipefail
+	if [[ "${DO_NIX_CUSTOM:=0}" -eq 1 ]]; then
+	      black .
+	else
+		poetry run black .
+	fi
+	find single_dipole_4 -type f -name "*.py" -exec sed -i -e 's/    /\t/g' {} \;
+	find bin -type f -name "*.py" -exec sed -i -e 's/    /\t/g' {} \;
+	# find tests -type f -name "*.py" -exec sed -i -e 's/    /\t/g' {} \;
+
+# release the app, checking that our working tree is clean and ready for release, optionally takes target version
+# release version="":
+# 	#!/usr/bin/env bash
+# 	set -euxo pipefail
+# 	if [[ -n "{{version}}" ]]; then
+# 		./scripts/release.sh {{version}}
+# 	else
+# 		./scripts/release.sh
+# 	fi
+
+# htmlcov:
+# 	poetry run pytest --cov-report=html
--- a/kalpa/init.py
+++ b/kalpa/init.py
@@ -0,0 +1,23 @@
+from single_dipole_4.read_bin_csv import read_dots_and_binned
+from single_dipole_4.common import get_model
+from single_dipole_4.config import (
+	Config,
+	TantriConfig,
+	GeneralConfig,
+	GenerationConfig,
+	DeepdogConfig,
+	ReducedModelParams,
+	MeasurementTypeEnum,
+)
+
+__all__ = [
+	"read_dots_and_binned",
+	"get_model",
+	"Config",
+	"TantriConfig",
+	"GeneralConfig",
+	"GenerationConfig",
+	"DeepdogConfig",
+	"ReducedModelParams",
+	"MeasurementTypeEnum",
+]
--- a/kalpa/common/init.py
+++ b/kalpa/common/init.py
@@ -0,0 +1,19 @@
+from single_dipole_4.common.model_params import get_model
+from single_dipole_4.common.cli_utils import set_up_logging
+from single_dipole_4.common.runner_utils import (
+	new_cd,
+	tantri_binned_output_name,
+	tantri_full_output_name,
+	sorted_bayesruns_name,
+	merged_coalesced_name,
+)
+
+__all__ = [
+	"get_model",
+	"set_up_logging",
+	"new_cd",
+	"tantri_binned_output_name",
+	"tantri_full_output_name",
+	"sorted_bayesruns_name",
+	"merged_coalesced_name",
+]
--- a/kalpa/common/cli_utils.py
+++ b/kalpa/common/cli_utils.py
@@ -0,0 +1,27 @@
+import pathlib
+import logging
+import single_dipole_4.config
+import typing
+
+
+def set_up_logging(
+	config: single_dipole_4.config.Config,
+	log_file: typing.Optional[str],
+	create_logfile_parents: bool = True,
+):
+	if log_file is None:
+		handlers = [
+			logging.StreamHandler(),
+		]
+	else:
+		if create_logfile_parents:
+			# create any parent directories for the log file if needed.
+			pathlib.Path(log_file).parent.mkdir(parents=True, exist_ok=True)
+		handlers = [logging.StreamHandler(), logging.FileHandler(log_file)]
+	logging.basicConfig(
+		level=logging.DEBUG,
+		format=config.general_config.log_pattern,
+		handlers=handlers,
+	)
+	logging.getLogger("pdme").setLevel(logging.ERROR)
+	logging.captureWarnings(True)
--- a/kalpa/common/model_params.py
+++ b/kalpa/common/model_params.py
@@ -0,0 +1,78 @@
+import dataclasses
+import typing
+import logging
+from tantri.dipoles.types import Orientation
+import single_dipole_4.config
+
+_logger = logging.getLogger(__name__)
+
+from pdme.model import (
+	LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
+	LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
+	LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel,
+)
+
+
+def long_orientation_name(orientation: Orientation) -> str:
+	return {
+		Orientation.RANDOM: "free",
+		Orientation.XY: "fixedxy",
+		Orientation.Z: "fixedz",
+	}[orientation]
+
+
+def _fixed_z_model_func(
+	xmin,
+	xmax,
+	ymin,
+	ymax,
+	zmin,
+	zmax,
+	wexp_min,
+	wexp_max,
+	pfixed,
+	n_max,
+	prob_occupancy,
+):
+	return LogSpacedRandomCountMultipleDipoleFixedMagnitudeFixedOrientationModel(
+		xmin,
+		xmax,
+		ymin,
+		ymax,
+		zmin,
+		zmax,
+		wexp_min,
+		wexp_max,
+		pfixed,
+		0,
+		0,
+		n_max,
+		prob_occupancy,
+	)
+
+
+def get_model(params: single_dipole_4.config.ReducedModelParams):
+	model_funcs = {
+		Orientation.Z: _fixed_z_model_func,
+		Orientation.RANDOM: LogSpacedRandomCountMultipleDipoleFixedMagnitudeModel,
+		Orientation.XY: LogSpacedRandomCountMultipleDipoleFixedMagnitudeXYModel,
+	}
+	_logger.info(f"Got params that look like {params=}")
+	_logger.info(f"Got params that look like {params=}")
+	model = model_funcs[params.orientation](
+		params.x_min,
+		params.x_max,
+		params.y_min,
+		params.y_max,
+		params.z_min,
+		params.z_max,
+		params.w_log_min,
+		params.w_log_max,
+		10**params.log_magnitude,
+		params.count,
+		0.99999999,
+	)
+	return (
+		f"geom_{model.xmin}_{model.xmax}_{model.ymin}_{model.ymax}_{model.zmin}_{model.zmax}-magnitude_{params.log_magnitude}-orientation_{long_orientation_name(params.orientation)}-dipole_count_{params.count}",
+		model,
+	)
--- a/kalpa/common/runner_utils.py
+++ b/kalpa/common/runner_utils.py
@@ -0,0 +1,43 @@
+import logging
+import contextlib
+import os
+import typing
+
+_logger = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def new_cd(x: typing.Union[str, bytes, os.PathLike]):
+	d = os.getcwd()
+
+	_logger.debug(f"Attempting to change dir from {d=} to {x=}")
+
+	# This could raise an exception, but it's probably
+	# best to let it propagate and let the caller
+	# deal with it, since they requested x
+	os.chdir(x)
+
+	try:
+		yield
+
+	finally:
+		# This could also raise an exception, but you *really*
+		# aren't equipped to figure out what went wrong if the
+		# old working directory can't be restored.
+		os.chdir(d)
+
+
+def tantri_full_output_name(tantri_index: int) -> str:
+	return f"apsd_{tantri_index}.csv"
+
+
+def tantri_binned_output_name(tantri_index: int) -> str:
+	return f"binned_apsd_{tantri_index}.csv"
+
+
+def sorted_bayesruns_name() -> str:
+	return "sorted-bayesruns"
+
+
+def merged_coalesced_name() -> str:
+	return "merged_coalesced.csv"
--- a/kalpa/config.py
+++ b/kalpa/config.py
@@ -0,0 +1,143 @@
+import json
+import deepdog.indexify
+from dataclasses import dataclass, field
+import typing
+import tantri.dipoles.types
+import pathlib
+from enum import Enum
+import logging
+
+_logger = logging.getLogger(__name__)
+
+
+class MeasurementTypeEnum(Enum):
+	POTENTIAL = "electric-potential"
+	X_ELECTRIC_FIELD = "x-electric-field"
+
+
+# Copy over some random constants to see if they're ever reused
+
+
+@dataclass(frozen=True)
+class GeneralConfig:
+	dots_json_name = "dots.json"
+	indexes_json_name = "indexes.json"
+	out_dir_name = "out"
+	log_pattern = "%(asctime)s | %(process)d | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
+	measurement_type: MeasurementTypeEnum = MeasurementTypeEnum.X_ELECTRIC_FIELD
+	root_directory: pathlib.Path = pathlib.Path.cwd()
+
+	mega_merged_name = "mega_merged_coalesced.csv"
+	mega_merged_inferenced_name = "mega_merged_coalesced_inferenced.csv"
+
+
+@dataclass(frozen=True)
+class TantriConfig:
+	index_seed_starter: int = 31415
+	num_seeds: int = 100
+	delta_t: float = 0.05
+	num_iterations: int = 100000
+	# sample_rate = 10
+
+
+@dataclass(frozen=True)
+class GenerationConfig:
+	# Interact with indexes.json, probably should be a subset
+	counts: typing.Sequence[int] = field(default_factory=lambda: [1, 10])
+	orientations: typing.Sequence[tantri.dipoles.types.Orientation] = field(
+		default_factory=lambda: [
+			tantri.dipoles.types.Orientation.RANDOM,
+			tantri.dipoles.types.Orientation.Z,
+			tantri.dipoles.types.Orientation.XY,
+		]
+	)
+	num_replicas: int = 3
+
+	tantri_configs: typing.Sequence[TantriConfig] = field(
+		default_factory=lambda: [TantriConfig()]
+	)
+
+	num_bin_time_series: int = 25
+	bin_log_width: float = 0.25
+
+
+@dataclass(frozen=True)
+class DeepdogConfig:
+	"""
+	Class that holds all of the computational parameters
+	"""
+
+	costs_to_try: typing.Sequence[float] = field(default_factory=lambda: [10, 1, 0.1])
+	target_success: int = 1000
+	max_monte_carlo_cycles_steps: int = 20
+
+
+@dataclass(frozen=True)
+class Config:
+	generation_config: GenerationConfig = GenerationConfig()
+	general_config: GeneralConfig = GeneralConfig()
+	deepdog_config: DeepdogConfig = DeepdogConfig()
+
+	def absify(self, filename: str) -> pathlib.Path:
+		ret = (self.general_config.root_directory / filename).resolve()
+		_logger.debug(f"Absifying {filename=}, geting {ret}")
+		return ret
+
+	def get_out_dir_path(self) -> pathlib.Path:
+		return self.absify(self.general_config.out_dir_name)
+
+	def get_dots_json_path(self) -> pathlib.Path:
+		return self.absify(self.general_config.dots_json_name)
+
+	def indexifier(self) -> deepdog.indexify.Indexifier:
+		with self.absify(self.general_config.indexes_json_name).open(
+			"r"
+		) as indexify_json_file:
+			indexify_spec = json.load(indexify_json_file)
+			indexify_data = indexify_spec["indexes"]
+			if "seed_spec" in indexify_spec:
+				seed_spec = indexify_spec["seed_spec"]
+				indexify_data[seed_spec["field_name"]] = list(
+					range(seed_spec["num_seeds"])
+				)
+
+			_logger.info(f"loading indexifier with data {indexify_data=}")
+			return deepdog.indexify.Indexifier(indexify_data)
+
+
+@dataclass(frozen=True)
+class ReducedModelParams:
+	"""
+	Units usually in 10s of nm for distance, s or Hz as needed for time units, log units are log base 10 of Hz or s values.
+	"""
+
+	x_min: float = -20
+	x_max: float = 20
+	y_min: float = -10
+	y_max: float = 10
+	z_min: float = 0
+	z_max: float = 5
+	w_log_min: float = -5
+	w_log_max: float = 1
+	count: int = 1
+	log_magnitude: float = 2
+	orientation: tantri.dipoles.types.Orientation = (
+		tantri.dipoles.types.Orientation.RANDOM
+	)
+
+	def config_dict(self, seed: int) -> typing.Dict[str, typing.Any]:
+		output_dict = {
+			"x_min": self.x_min,
+			"x_max": self.x_max,
+			"y_min": self.y_min,
+			"y_max": self.y_max,
+			"z_min": self.z_min,
+			"z_max": self.z_max,
+			"mag": 10**self.log_magnitude,
+			"w_log_min": self.w_log_min,
+			"w_log_max": self.w_log_max,
+			"orientation": self.orientation,
+			"dipole_count": self.count,
+			"generation_seed": seed,
+		}
+		return output_dict
--- a/kalpa/inference_coalesce/init.py
+++ b/kalpa/inference_coalesce/init.py
@@ -0,0 +1,3 @@
+from single_dipole_4.inference_coalesce.coalescer import Coalescer
+
+__all__ = ["Coalescer"]
--- a/kalpa/inference_coalesce/coalescer.py
+++ b/kalpa/inference_coalesce/coalescer.py
@@ -0,0 +1,113 @@
+import logging
+
+_logger = logging.getLogger(__name__)
+
+
+class Keys:
+	def __init__(self, row):
+		self.row = row
+
+	def actual_key(self):
+		return (self.row["actual_orientation"], self.row["actual_avg_filled"])
+
+	def dot_cost_key(self):
+		return (self.row["dot_name"], self.row["target_cost"])
+
+	def model_key(self):
+		return (
+			self.row["orientation"],
+			self.row["avg_filled"],
+			self.row["log_magnitude"],
+		)
+
+	def replica_key(self):
+		return self.row["generation_replica_index"]
+
+	def all_keys(self):
+		return (
+			self.actual_key(),
+			self.dot_cost_key(),
+			self.replica_key(),
+			self.model_key(),
+		)
+
+
+class Coalescer:
+	def __init__(self, rows, num_replicas: int):
+		self.rows = rows
+		# sort into actuals, then dots, then probs
+		self.actual_dict = {}
+		for row in self.rows:
+			keys = Keys(row).all_keys()
+			_logger.debug(keys)
+			if keys[0] not in self.actual_dict:
+				_logger.debug(f"Creating layer 0 for {keys[0]}")
+				self.actual_dict[keys[0]] = {}
+			if keys[1] not in self.actual_dict[keys[0]]:
+				_logger.debug(f"Creating layer 1 for {keys[0]}, {keys[1]}")
+				self.actual_dict[keys[0]][keys[1]] = {}
+			if keys[2] not in self.actual_dict[keys[0]][keys[1]]:
+				_logger.debug(f"Creating layer 2 for {keys[0]}, {keys[1]}, {keys[2]}")
+				self.actual_dict[keys[0]][keys[1]][keys[2]] = {}
+			_logger.debug(
+				f"Adding to {self.actual_dict[keys[0]][keys[1]][keys[2]]} for {keys[3]}"
+			)
+			self.actual_dict[keys[0]][keys[1]][keys[2]][keys[3]] = row
+
+		self.num_replicas = num_replicas
+
+	def coalesce_generations(self, actual_key, dot_key):
+
+		_logger.debug(self.actual_dict.keys())
+		_logger.debug(self.actual_dict[actual_key].keys())
+
+		subdict = self.actual_dict[actual_key][dot_key]
+
+		_logger.debug(f"subdict keys: {subdict.keys()}")
+
+		# TODO hardcoding 3 generations
+		if self.num_replicas != 3:
+			raise ValueError(
+				f"num replicas was {self.num_replicas}, but we've hard coded 3"
+			)
+		# generations_keys = ["0", "1", "2"]
+
+		# 0th gen is easiest
+		for model_key, val in subdict["0"].items():
+			val["coalesced_prob"] = val["prob"]
+
+		weight1 = sum(
+			[
+				float(subdict["0"][key]["coalesced_prob"])
+				* float(subdict["1"][key]["prob"])
+				for key in subdict["1"].keys()
+			]
+		)
+		_logger.debug(weight1)
+		for model_key, val in subdict["1"].items():
+			val["coalesced_prob"] = (
+				float(val["prob"])
+				* float(subdict["0"][model_key]["coalesced_prob"])
+				/ weight1
+			)
+
+		weight2 = sum(
+			[
+				float(subdict["1"][key]["coalesced_prob"])
+				* float(subdict["2"][key]["prob"])
+				for key in subdict["2"].keys()
+			]
+		)
+		_logger.debug(weight2)
+		for model_key, val in subdict["2"].items():
+			val["coalesced_prob"] = (
+				float(val["prob"])
+				* float(subdict["1"][model_key]["coalesced_prob"])
+				/ weight2
+			)
+
+	def coalesce_all(self):
+		for actual_key in self.actual_dict.keys():
+			for dot_key in self.actual_dict[actual_key].keys():
+				self.coalesce_generations(actual_key, dot_key)
+		return self.actual_dict
--- a/kalpa/read_bin_csv.py
+++ b/kalpa/read_bin_csv.py
@@ -0,0 +1,310 @@
+import re
+import numpy
+import dataclasses
+import typing
+import json
+import pathlib
+import logging
+import csv
+import deepdog.direct_monte_carlo.dmc_filters
+import deepdog.direct_monte_carlo.compose_filter
+import deepdog.direct_monte_carlo.cost_function_filter
+import tantri.cli
+
+import pdme
+import pdme.util.fast_v_calc
+import pdme.measurement
+import pdme.measurement.input_types
+
+_logger = logging.getLogger(__name__)
+
+X_ELECTRIC_FIELD = "Ex"
+POTENTIAL = "V"
+
+
+@dataclasses.dataclass
+class Measurement:
+	dot_measurement: pdme.measurement.DotMeasurement
+	stdev: float
+
+
+class CostFunction:
+	def __init__(self, measurement_type, dot_inputs_array, actual_measurement_array):
+		_logger.info(f"Cost function with measurement type of {measurement_type}")
+		self.measurement_type = measurement_type
+		self.dot_inputs_array = dot_inputs_array
+		self.actual_measurement_array = actual_measurement_array
+		self.actual_measurement_array2 = actual_measurement_array**2
+
+	def __call__(self, dipoles_to_test):
+		if self.measurement_type == X_ELECTRIC_FIELD:
+			vals = pdme.util.fast_v_calc.fast_efieldxs_for_dipoleses(
+				self.dot_inputs_array, dipoles_to_test
+			)
+		elif self.measurement_type == POTENTIAL:
+			vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
+				self.dot_inputs_array, dipoles_to_test
+			)
+		diffs = (
+			vals - self.actual_measurement_array
+		) ** 2 / self.actual_measurement_array2
+		return numpy.sqrt(diffs.mean(axis=-1))
+
+
+class StDevUsingCostFunction:
+	def __init__(
+		self,
+		measurement_type,
+		dot_inputs_array,
+		actual_measurement_array,
+		actual_stdev_array,
+	):
+		_logger.info(f"Cost function with measurement type of {measurement_type}")
+		self.measurement_type = measurement_type
+		self.dot_inputs_array = dot_inputs_array
+		self.actual_measurement_array = actual_measurement_array
+		self.actual_measurement_array2 = actual_measurement_array**2
+		self.actual_stdev_array = actual_stdev_array
+		self.actual_stdev_array2 = actual_stdev_array**2
+
+	def __call__(self, dipoles_to_test):
+		if self.measurement_type == X_ELECTRIC_FIELD:
+			vals = pdme.util.fast_v_calc.fast_efieldxs_for_dipoleses(
+				self.dot_inputs_array, dipoles_to_test
+			)
+		elif self.measurement_type == POTENTIAL:
+			vals = pdme.util.fast_v_calc.fast_vs_for_dipoleses(
+				self.dot_inputs_array, dipoles_to_test
+			)
+		diffs = ((vals - self.actual_measurement_array) ** 2) / self.actual_stdev_array2
+		return numpy.sqrt(diffs.mean(axis=-1))
+
+
+# the key for frequencies in what we return
+RETURNED_FREQUENCIES_KEY = "frequencies"
+
+
+def read_dots_json(json_file: pathlib.Path) -> typing.Dict:
+	try:
+		with open(json_file, "r") as file:
+			return _reshape_dots_dict(json.load(file))
+	except Exception as e:
+		_logger.error(
+			f"Had a bad time reading the dots file {json_file}, sorry.", exc_info=e
+		)
+		raise e
+
+
+def _reshape_dots_dict(dots_dict: typing.Sequence[typing.Dict]) -> typing.Dict:
+	ret = {}
+	for dot in dots_dict:
+		ret[dot["label"]] = dot["r"]
+	return ret
+
+
+BINNED_HEADER_REGEX = r"\s*APSD_(?P<measurement_type>\w+)_(?P<dot_name>\w+)_(?P<summary_stat>mean|stdev)\s*"
+
+
+@dataclasses.dataclass
+class ParsedBinHeader:
+	original_field: str
+	measurement_type: str
+	dot_name: str
+	summary_stat: str
+
+
+def read_bin_csv(
+	csv_file: pathlib.Path,
+) -> typing.Tuple[str, typing.Dict[str, typing.Any]]:
+
+	measurement_type = None
+	_logger.info(f"Assuming measurement type is {measurement_type} for now")
+	try:
+		with open(csv_file, "r", newline="") as file:
+			reader = csv.DictReader(file)
+			fields = reader.fieldnames
+
+			if fields is None:
+				raise ValueError(
+					f"Really wanted our fields for fiel {file=} to be non-None, but they're None"
+				)
+			freq_field = fields[0]
+
+			remaining_fields = fields[1:]
+			_logger.debug(f"Going to read frequencies from {freq_field=}")
+
+			parsed_headers = {}
+			aggregated_dict: typing.Dict[str, typing.Any] = {
+				RETURNED_FREQUENCIES_KEY: []
+			}
+
+			for field in remaining_fields:
+				match = re.match(BINNED_HEADER_REGEX, field)
+				if match is None:
+					_logger.warning(f"Could not parse {field=}")
+					continue
+				match_groups = match.groupdict()
+				parsed_header = ParsedBinHeader(
+					field,
+					match_groups["measurement_type"],
+					match_groups["dot_name"],
+					match_groups["summary_stat"],
+				)
+				parsed_headers[field] = parsed_header
+
+				if parsed_header.dot_name not in aggregated_dict:
+					aggregated_dict[parsed_header.dot_name] = {}
+
+				if (
+					parsed_header.summary_stat
+					not in aggregated_dict[parsed_header.dot_name]
+				):
+					aggregated_dict[parsed_header.dot_name][
+						parsed_header.summary_stat
+					] = []
+
+				if measurement_type is not None:
+					if measurement_type != parsed_header.measurement_type:
+						_logger.warning(
+							f"Attempted to set already set measurement type {measurement_type}. Allowing the switch to {parsed_header.measurement_type}, but it's problematic"
+						)
+				measurement_type = parsed_header.measurement_type
+
+			_logger.debug("finished parsing headers")
+			_logger.debug("throwing away the measurement type for now")
+
+			for row in reader:
+				# _logger.debug(f"Got {row=}")
+				aggregated_dict[RETURNED_FREQUENCIES_KEY].append(
+					float(row[freq_field].strip())
+				)
+				for field, parsed_header in parsed_headers.items():
+					value = float(row[field].strip())
+					aggregated_dict[parsed_header.dot_name][
+						parsed_header.summary_stat
+					].append(value)
+
+			if measurement_type is None:
+				raise ValueError(
+					f"For some reason {measurement_type=} is None? We want to know our measurement type."
+				)
+			return measurement_type, aggregated_dict
+	except Exception as e:
+		_logger.error(
+			f"Had a bad time reading the binned data {csv_file}, sorry.", exc_info=e
+		)
+		raise e
+
+
+@dataclasses.dataclass
+class BinnedData:
+	dots_dict: typing.Dict
+	csv_dict: typing.Dict[str, typing.Any]
+	measurement_type: str
+
+	# we're ignoring stdevs for the current moment, as in the calculator single_dipole_matches.py script.
+	def _dot_to_measurement(self, dot_name: str) -> typing.Sequence[Measurement]:
+		if dot_name not in self.dots_dict:
+			raise KeyError(f"Could not find {dot_name=} in {self.dots_dict=}")
+		if dot_name not in self.csv_dict:
+			raise KeyError(f"Could not find {dot_name=} in {self.csv_dict=}")
+
+		dot_r = self.dots_dict[dot_name]
+		freqs = self.csv_dict[RETURNED_FREQUENCIES_KEY]
+		vs = self.csv_dict[dot_name]["mean"]
+		stdevs = self.csv_dict[dot_name]["stdev"]
+
+		return [
+			Measurement(
+				dot_measurement=pdme.measurement.DotMeasurement(f=f, v=v, r=dot_r),
+				stdev=stdev,
+			)
+			for f, v, stdev in zip(freqs, vs, stdevs)
+		]
+
+	def _dot_to_stdev(self, dot_name: str) -> typing.Sequence[float]:
+		if dot_name not in self.dots_dict:
+			raise KeyError(f"Could not find {dot_name=} in {self.dots_dict=}")
+		if dot_name not in self.csv_dict:
+			raise KeyError(f"Could not find {dot_name=} in {self.csv_dict=}")
+
+		stdevs = self.csv_dict[dot_name]["stdev"]
+
+		return stdevs
+
+	def measurements(
+		self, dot_names: typing.Sequence[str]
+	) -> typing.Sequence[Measurement]:
+		_logger.debug(f"Constructing measurements for dots {dot_names=}")
+		ret = []
+		for dot_name in dot_names:
+			ret.extend(self._dot_to_measurement(dot_name))
+		return ret
+
+	def _cost_function(self, measurements: typing.Sequence[Measurement]):
+		dot_measurements = [m.dot_measurement for m in measurements]
+		meas_array = numpy.array([m.v for m in dot_measurements])
+
+		_logger.debug(f"Obtained {meas_array=}")
+
+		inputs = [(m.dot_measurement.r, m.dot_measurement.f) for m in measurements]
+		input_array = pdme.measurement.input_types.dot_inputs_to_array(inputs)
+		_logger.debug(f"Obtained {input_array=}")
+
+		return CostFunction(self.measurement_type, input_array, meas_array)
+
+	def _stdev_cost_function(
+		self,
+		measurements: typing.Sequence[Measurement],
+	):
+		meas_array = numpy.array([m.dot_measurement.v for m in measurements])
+		stdev_array = numpy.array([m.stdev for m in measurements])
+
+		_logger.debug(f"Obtained {meas_array=}")
+
+		inputs = [(m.dot_measurement.r, m.dot_measurement.f) for m in measurements]
+		input_array = pdme.measurement.input_types.dot_inputs_to_array(inputs)
+		_logger.debug(f"Obtained {input_array=}")
+
+		return StDevUsingCostFunction(
+			self.measurement_type, input_array, meas_array, stdev_array
+		)
+
+	def cost_function_filter(self, dot_names: typing.Sequence[str], target_cost: float):
+		measurements = self.measurements(dot_names)
+		cost_function = self._cost_function(measurements)
+		return deepdog.direct_monte_carlo.cost_function_filter.CostFunctionTargetFilter(
+			cost_function, target_cost
+		)
+
+	def stdev_cost_function_filter(
+		self, dot_names: typing.Sequence[str], target_cost: float
+	):
+		measurements = self.measurements(dot_names)
+		cost_function = self._stdev_cost_function(measurements)
+		return deepdog.direct_monte_carlo.cost_function_filter.CostFunctionTargetFilter(
+			cost_function, target_cost
+		)
+
+
+def read_dots_and_binned(json_file: pathlib.Path, csv_file: pathlib.Path) -> BinnedData:
+	dots = read_dots_json(json_file)
+	measurement_type, binned = read_bin_csv(csv_file)
+	return BinnedData(
+		measurement_type=measurement_type, dots_dict=dots, csv_dict=binned
+	)
+
+
+if __name__ == "__main__":
+	logging.basicConfig(level=logging.DEBUG)
+
+	print(read_dots_json(pathlib.Path("dots.json")))
+	# print(read_bin_csv(pathlib.Path("binned-0.01-10000-50-12345.csv")))
+	binned_data = read_dots_and_binned(
+		pathlib.Path("dots.json"), pathlib.Path("binned-0.01-10000-50-12345.csv")
+	)
+	_logger.info(binned_data)
+	for entry in binned_data.measurements(["uprise1", "dot1"]):
+		_logger.info(entry)
+	filter = binned_data.cost_function_filter(["uprise1", "dot1"], 0.5)
+	_logger.info(filter)
--- a/kalpa/stages/init.py
+++ b/kalpa/stages/init.py
@@ -0,0 +1,91 @@
+import logging
+
+import single_dipole_4.stages.stage01
+import single_dipole_4.stages.stage02
+import single_dipole_4.stages.stage03
+import single_dipole_4.stages.stage04
+import single_dipole_4.common
+import tantri.dipoles.types
+
+import argparse
+
+_logger = logging.getLogger(__name__)
+
+
+class Runner:
+	def __init__(self, config: single_dipole_4.Config):
+		self.config = config
+		_logger.info(f"Initialising runner with {config=}")
+
+	def run(self):
+		_logger.info("*** Beginning Stage 01 ***")
+		stage01 = single_dipole_4.stages.stage01.Stage01Runner(self.config)
+		stage01.run()
+
+		_logger.info("*** Beginning Stage 02 ***")
+		stage02 = single_dipole_4.stages.stage02.Stage02Runner(self.config)
+		stage02.run()
+
+		_logger.info("*** Beginning Stage 03 ***")
+		stage03 = single_dipole_4.stages.stage03.Stage03Runner(self.config)
+		stage03.run()
+
+		_logger.info("*** Beginning Stage 04 ***")
+		stage04 = single_dipole_4.stages.stage04.Stage04Runner(self.config)
+		stage04.run()
+
+
+def parse_args():
+
+	parser = argparse.ArgumentParser(
+		"Multistage Runner", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+	)
+
+	parser.add_argument(
+		"--log-file",
+		type=str,
+		help="A filename for logging to, if not provided will only log to stderr",
+		default=None,
+	)
+	args = parser.parse_args()
+	return args
+
+
+def main():
+	args = parse_args()
+
+	tantri_configs = [
+		single_dipole_4.TantriConfig(12345, 50, 0.5, 100000),
+		# single_dipole_4.TantriConfig(1234, 50, 0.0005, 10000),
+	]
+	generation_config = single_dipole_4.GenerationConfig(
+		tantri_configs=tantri_configs,
+		counts=[1],
+		num_replicas=3,
+		orientations=[tantri.dipoles.types.Orientation.XY],
+		num_bin_time_series=25,
+	)
+	general_config = single_dipole_4.GeneralConfig(
+		measurement_type=single_dipole_4.MeasurementTypeEnum.POTENTIAL
+	)
+
+	deepdog_config = single_dipole_4.DeepdogConfig(
+		costs_to_try=[10, 2, 1, 0.1],
+		max_monte_carlo_cycles_steps=20,
+	)
+
+	config = single_dipole_4.Config(
+		generation_config=generation_config,
+		general_config=general_config,
+		deepdog_config=deepdog_config,
+	)
+
+	single_dipole_4.common.set_up_logging(config, args.log_file)
+
+	_logger.info(f"Got {config=}")
+	runner = Runner(config)
+	runner.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/kalpa/stages/stage01.py
+++ b/kalpa/stages/stage01.py
@@ -0,0 +1,234 @@
+#! /usr/bin/env poetry run python
+
+import json
+import pathlib
+import argparse
+import logging
+import single_dipole_4
+import single_dipole_4.common
+import tantri.cli
+import tantri.dipoles.types
+
+
+_logger = logging.getLogger(__name__)
+
+# constants
+
+# DOTS	DOTS	DOTS DOTS DOTS
+# DOTS = "dots.json"
+# POTENTIAL = "electric-potential"
+# X_ELECTRIC_FIELD = "x-electric-field"
+# LOG_PATTERN = "%(asctime)s | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
+
+# OUT_DIR = "out"
+
+# parameters for iteration
+# TODO Consider how these interact with indexes.json
+# COUNTS = [1, 10]
+# ORIENTATIONS = ["XY", "RANDOM", "Z"]
+# NUM_REPLICAS = 3
+
+# config type params, should be logged!
+# INDEX_STARTER = 3141
+
+# NUM_SEEDS = 100
+# these are obviously not independent but it's just easier than thinking about floats to define them both here
+# DELTA_T = 0.05
+# SAMPLE_RATE = 10
+# NUM_ITERATIONS = 100000 # for the time serieses how many steps
+
+# for binnng
+# NUM_BIN_TS = 25
+# BIN_WIDTH_LOG = 0.25
+
+# def get_config(count, orientation, seed):
+# 	output_dict = {
+# 		"x_min": -20,
+# 		"x_max": 20,
+# 		"y_min": -10,
+# 		"y_max": 10,
+# 		"z_min": 0,
+# 		"z_max": 5,
+# 		"mag": 100,
+# 		"w_log_min": -4,
+# 		"w_log_max": 1,
+# 		"orientation": orientation,
+# 		"dipole_count": count,
+# 		"generation_seed": seed
+# 	}
+# 	return output_dict
+
+# def set_up_logging(log_file):
+# 	if log_file is None:
+# 		handlers = [
+# 			logging.StreamHandler(),
+# 		]
+# 	else:
+# 		handlers = [
+# 			logging.StreamHandler(),
+# 			logging.FileHandler(log_file)
+# 		]
+# 	logging.basicConfig(
+# 		level=logging.DEBUG,
+# 		format = LOG_PATTERN,
+# 		handlers=handlers,
+# 	)
+# 	logging.getLogger("pdme").setLevel(logging.ERROR)
+# 	logging.captureWarnings(True)
+
+
+class Stage01Runner:
+	def __init__(self, config: single_dipole_4.Config):
+		self.config = config
+		_logger.info(f"Initialising Stage01 runner with {config=}")
+
+	def generate_single_subdir(
+		self, seed: int, count: int, orientation: str, replica: int
+	):
+		"""
+		create a directory, populate it with stuff.
+		"""
+
+		_logger.info(
+			f"Generating config for {seed=} {count=} {orientation=} {replica=}"
+		)
+		out = self.config.get_out_dir_path()
+		directory = out / f"{orientation.lower()}-{count}-{replica}"
+		directory.mkdir(parents=True, exist_ok=True)
+
+		config_json = directory / "generation_config.json"
+		dipoles_json = directory / "dipoles.json"
+
+		with open(config_json, "w") as conf_file:
+			params = single_dipole_4.ReducedModelParams(
+				count=count, orientation=tantri.dipoles.types.Orientation(orientation)
+			)
+			_logger.debug(f"Got params {params=}")
+			json.dump(params.config_dict(seed), conf_file)
+			# json.dump(single_dipole_4.common.model_config_dict(count, orientation, seed), conf_file)
+
+		tantri.cli._generate_dipoles(config_json, dipoles_json, (seed, replica, 1))
+
+		# tantri.cli._write_apsd(dipoles_json, DOTS, X_ELECTRIC_FIELD, DELTA_T, NUM_ITERATIONS, NUM_BIN_TS, (index, replica, 2), output_csv, binned_csv, BIN_WIDTH_LOG, True)
+		for tantri_index, tantri_config in enumerate(
+			self.config.generation_config.tantri_configs
+		):
+			output_csv = directory / single_dipole_4.common.tantri_full_output_name(
+				tantri_index
+			)
+			binned_csv = directory / single_dipole_4.common.tantri_binned_output_name(
+				tantri_index
+			)
+			tantri.cli._write_apsd(
+				dipoles_json,
+				self.config.general_config.dots_json_name,
+				self.config.general_config.measurement_type.value,
+				tantri_config.delta_t,
+				tantri_config.num_iterations,
+				self.config.generation_config.num_bin_time_series,
+				(seed, replica, 2),
+				output_csv,
+				binned_csv,
+				self.config.generation_config.bin_log_width,
+				True,
+			)
+
+	def run(self):
+		seed_index = 0
+		for count in self.config.generation_config.counts:
+			for orientation in self.config.generation_config.orientations:
+				for replica in range(self.config.generation_config.num_replicas):
+					_logger.info(
+						f"Generating for {seed_index=}: [{count=}, {orientation=}, {replica=}"
+					)
+					self.generate_single_subdir(seed_index, count, orientation, replica)
+					seed_index += 1
+
+
+def parse_args():
+
+	parser = argparse.ArgumentParser(
+		"Single dipole 4 config maker",
+		formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+	)
+
+	parser.add_argument(
+		"--log-file",
+		type=str,
+		help="A filename for logging to, if not provided will only log to stderr",
+		default=None,
+	)
+	args = parser.parse_args()
+	return args
+
+
+# def gen_config(index: int, count: int, orientation: str, replica: int):
+# 	"""
+# 	create a directory, populate it with stuff.
+# 	"""
+
+# 	_logger.info(f"Generating config for {index=} {count=} {orientation=} {replica=}")
+# 	out = pathlib.Path(OUT_DIR)
+# 	directory = out / f"{orientation.lower()}-{count}-{replica}"
+# 	directory.mkdir(parents=True, exist_ok=True)
+
+# 	config_json = directory/"generation_config.json"
+# 	dipoles_json = directory/"dipoles.json"
+
+# 	output_csv = directory/"apsd.csv"
+# 	binned_csv = directory/"binned_apsd.csv"
+
+# 	with open(config_json, "w") as conf_file:
+# 		json.dump(get_config(count, orientation, index), conf_file)
+
+
+# 	tantri.cli._generate_dipoles(config_json, dipoles_json, (index, replica, 1))
+
+# 	tantri.cli._write_apsd(dipoles_json, DOTS, X_ELECTRIC_FIELD, DELTA_T, NUM_ITERATIONS, NUM_BIN_TS, (index, replica, 2), output_csv, binned_csv, BIN_WIDTH_LOG, True)
+
+
+def main():
+	args = parse_args()
+
+	tantri_configs = [
+		single_dipole_4.TantriConfig(31415, 100, 5, 100000),
+		single_dipole_4.TantriConfig(314, 100, 0.00005, 100000),
+	]
+	generation_config = single_dipole_4.GenerationConfig(
+		tantri_configs=tantri_configs,
+		counts=[1],
+		num_replicas=3,
+		orientations=[tantri.dipoles.types.Orientation.Z],
+	)
+
+	config = single_dipole_4.Config(generation_config=generation_config)
+
+	single_dipole_4.common.set_up_logging(config, args.log_file)
+
+	_logger.info("Generating our data, for the following iterations")
+
+	_logger.info(config)
+	# _logger.info(f"{COUNTS=}")
+	# _logger.info(f"{ORIENTATIONS=}")
+	# _logger.info(f"{NUM_REPLICAS=}")
+
+	# _logger.info("Our parameters used: ")
+
+	# _logger.info(f"\t{INDEX_STARTER=}")
+
+	# _logger.info(f"\t{NUM_SEEDS=}")
+	# # these are obviously not independent but it's just easier than thinking about floats to define them both here
+	# _logger.info(f"\t{DELTA_T=}")
+	# _logger.info(f"\t{SAMPLE_RATE=}")
+	# _logger.info(f"\t{NUM_ITERATIONS=}")
+
+	# # for binnng
+	# _logger.info(f"\t{NUM_BIN_TS=}")
+	# _logger.info(f"\t{BIN_WIDTH_LOG=}")
+
+	runner = Stage01Runner(config)
+	runner.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/kalpa/stages/stage02.py
+++ b/kalpa/stages/stage02.py
@@ -0,0 +1,282 @@
+import argparse
+import pathlib
+
+# import csv
+import numpy
+import deepdog
+import deepdog.direct_monte_carlo.compose_filter
+import deepdog.indexify
+import deepdog.direct_monte_carlo
+import logging
+
+# # import itertools
+import pdme
+
+import tantri.cli
+import tantri.cli.file_importer
+import tantri.dipoles.types
+import typing
+
+# import pdme.measurement
+#
+# # import random
+# # import sys
+# # import re
+# # import math
+# # import os
+import json
+
+# # from dataclasses import dataclass
+#
+#
+# folder in curr dir
+import single_dipole_4
+import single_dipole_4.common
+
+
+_logger = logging.getLogger(__name__)
+
+# LOG_PATTERN = "%(asctime)s | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
+
+
+# JOBS = list(range(18))
+# TOOD move to json file and read
+# COSTS = [10.0, 5.0, 1.0, 0.5, 0.1, 0.06]
+# DOTS_DICT = {
+# 	"dot1": "dot1",
+# 	"dot2": "dot1,dot2",
+# 	"line": "dot1,dot2,line",
+# 	"triangle1": "dot1,dot2,triangle1",
+# 	"triangle2": "dot1,dot2,triangle2",
+# 	"uprise1": "dot1,dot2,uprise1",
+# 	"uprise2": "dot1,dot2,uprise2",
+# }
+
+
+def enumify_orientation_string(
+	orientation_string: str,
+) -> tantri.dipoles.types.Orientation:
+	canonical_orientation_string = orientation_string.upper()
+
+	if canonical_orientation_string in ["FIXEDZ", "Z"]:
+		return tantri.dipoles.types.Orientation.Z
+
+	if canonical_orientation_string in ["FIXEDXY", "XY"]:
+		return tantri.dipoles.types.Orientation.XY
+
+	if canonical_orientation_string in ["FREE", "RANDOM"]:
+		return tantri.dipoles.types.Orientation.RANDOM
+
+	else:
+		raise ValueError(
+			f"Could not find match for orientation_string {orientation_string=}"
+		)
+
+
+class Stage02Runner:
+	def __init__(self, config: single_dipole_4.Config):
+		self.config = config
+		_logger.info(f"Initialising Stage02 runner with {config=}")
+
+		with config.absify(config.general_config.indexes_json_name).open(
+			"r"
+		) as indexify_json_file:
+			indexify_spec = json.load(indexify_json_file)
+			indexify_data = indexify_spec["indexes"]
+			if "seed_spec" in indexify_spec:
+				seed_spec = indexify_spec["seed_spec"]
+				indexify_data[seed_spec["field_name"]] = list(
+					range(seed_spec["num_seeds"])
+				)
+
+			_logger.info(f"loading indexifier with data {indexify_data=}")
+			self.indexifier = deepdog.indexify.Indexifier(indexify_data)
+
+		self.dots = tantri.cli.file_importer.read_dots_json_file(
+			self.config.absify(self.config.general_config.dots_json_name)
+		)
+		_logger.info(f"Got dots {self.dots=}")
+
+	def _dots_to_include(self, current_dot: str) -> typing.Sequence[str]:
+		if current_dot == "dot1":
+			return ["dot1"]
+		if current_dot == "dot2":
+			return ["dot1", "dot2"]
+		else:
+			return ["dot1", "dot2", current_dot]
+
+	def run_in_subdir(self, subdir: pathlib.Path):
+		with single_dipole_4.common.new_cd(subdir):
+			_logger.debug(f"Running inside {subdir=}")
+
+			# TODO hardcoding that we're executing every job index.
+			num_jobs = len(self.indexifier)
+			_logger.debug(f"Have {num_jobs=}")
+			seed_index = 0
+			for job_index in range(num_jobs):
+				for cost in self.config.deepdog_config.costs_to_try:
+					for dot in self.dots:
+
+						seed_index += 1
+
+						combined_dot_name = ",".join(
+							[d for d in self._dots_to_include(dot.label)]
+						)
+						trial_name = (
+							f"{dot.label}-{combined_dot_name}-{cost}-{job_index}"
+						)
+						_logger.info(f"Working on {trial_name=}")
+						_logger.debug(f"Have {seed_index=}")
+						self.single_run_in_subdir(
+							job_index, cost, dot.label, trial_name, seed_index
+						)
+
+	def single_run_in_subdir(
+		self,
+		job_index: int,
+		cost: float,
+		dot_name: str,
+		trial_name: str,
+		seed_index: int,
+	):
+		# _logger.info(f"Got job index {job_index}")
+		# NOTE This guy runs inside subdirs, obviously. In something like <single_dipole_4>/out/z-10-2/dipoles
+
+		# we had job_index, trial_name, args let's see what we need
+
+		_logger.debug(
+			f"run_analysis() being called with ({job_index=}, {cost=}, {dot_name=}, {trial_name=}, {seed_index=})"
+		)
+
+		_logger.info(
+			f"Have {self.config.generation_config.tantri_configs} as our tantri_configs"
+		)
+		num_tantri_configs = len(self.config.generation_config.tantri_configs)
+		binned_datas = [
+			single_dipole_4.read_dots_and_binned(
+				self.config.get_dots_json_path(),
+				pathlib.Path("..")
+				/ single_dipole_4.common.tantri_binned_output_name(tantri_index),
+			)
+			for tantri_index in range(num_tantri_configs)
+		]
+
+		dot_names = self._dots_to_include(dot_name)
+		_logger.debug(f"Got dot names {dot_names}")
+
+		models = []
+
+		indexes = self.indexifier.indexify(job_index)
+
+		_logger.debug(f"indexes are {indexes}")
+
+		log_magnitude = indexes["magnitudes"]
+		avg_filled = indexes["occupancies"]
+		orientation = enumify_orientation_string(indexes["orientations"])
+		# we are just finding matches given a single seed so don't need to change this
+		seed = seed_index
+
+		# TODO find way to store this as a global config file
+		occupancies_dict = {
+			1: (1000, 1000),
+			10: (1000, 100),
+			16: (10000, 10),
+			31: (1000, 100),
+			56: (1000, 100),
+			100: (100, 100),
+		}
+
+		mccount, mccountcycles = occupancies_dict[avg_filled]
+
+		model_params = single_dipole_4.ReducedModelParams(
+			count=avg_filled, log_magnitude=log_magnitude, orientation=orientation
+		)
+
+		models.append(single_dipole_4.get_model(model_params))
+
+		_logger.info(f"have {len(models)} models to look at")
+		if len(models) == 1:
+			_logger.info(f"only one model, name: {models[0][0]}")
+
+		deepdog_config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
+			monte_carlo_count_per_cycle=mccount,
+			monte_carlo_cycles=mccountcycles,
+			target_success=self.config.deepdog_config.target_success,
+			max_monte_carlo_cycles_steps=self.config.deepdog_config.max_monte_carlo_cycles_steps,
+			monte_carlo_seed=seed,
+			write_successes_to_file=True,
+			tag=trial_name,
+			write_bayesrun_file=True,
+		)
+
+		_logger.info(f"{deepdog_config=}")
+
+		stdev_cost_function_filters = [
+			b.stdev_cost_function_filter(dot_names, cost) for b in binned_datas
+		]
+
+		_logger.debug(f"{stdev_cost_function_filters=}")
+		combining_filter = deepdog.direct_monte_carlo.compose_filter.ComposedDMCFilter(
+			stdev_cost_function_filters
+		)
+
+		run = deepdog.direct_monte_carlo.DirectMonteCarloRun(
+			model_name_pairs=models,
+			filter=combining_filter,
+			config=deepdog_config,
+		)
+		results = run.execute()
+		_logger.info(results)
+
+	def run(self):
+		"""Going to iterate over every folder in out_dir, and execute the subdir stuff inside dirs like <single_dipole_4>/out/z-10-2/dipoles"""
+		out_dir_path = self.config.get_out_dir_path()
+		subdirs = [child for child in out_dir_path.iterdir() if child.is_dir]
+		# _logger.info(f"Going to execute within each of the directories in {subdirs=}")
+		for subdir in subdirs:
+			# skip try finally for now just blow up if problem
+			_logger.debug(f"Running for {subdir=}")
+			dipoles_dir = subdir / "dipoles"
+			dipoles_dir.mkdir(exist_ok=True, parents=False)
+			self.run_in_subdir(subdir / "dipoles")
+
+
+def parse_args():
+
+	parser = argparse.ArgumentParser(
+		"Stage02 Runner", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+	)
+
+	parser.add_argument(
+		"--log-file",
+		type=str,
+		help="A filename for logging to, if not provided will only log to stderr",
+		default=None,
+	)
+	args = parser.parse_args()
+	return args
+
+
+def main():
+	args = parse_args()
+
+	tantri_configs = [
+		single_dipole_4.TantriConfig(31415, 100, 5, 100000),
+		single_dipole_4.TantriConfig(314, 100, 0.00005, 100000),
+	]
+	generation_config = single_dipole_4.GenerationConfig(tantri_configs=tantri_configs)
+
+	config = single_dipole_4.Config(generation_config=generation_config)
+
+	single_dipole_4.common.set_up_logging(config, args.log_file)
+
+	_logger.info("Generating our data, for the following iterations")
+
+	_logger.info(config)
+
+	runner = Stage02Runner(config)
+	runner.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/kalpa/stages/stage03.py
+++ b/kalpa/stages/stage03.py
@@ -0,0 +1,196 @@
+import argparse
+import pathlib
+
+# import csv
+import numpy
+import csv
+import deepdog
+import deepdog.cli.probs
+import deepdog.cli.probs.main
+import deepdog.direct_monte_carlo.compose_filter
+import deepdog.indexify
+import deepdog.direct_monte_carlo
+import logging
+
+# # import itertools
+import pdme
+
+import single_dipole_4.stages
+import single_dipole_4.stages.stage03_1
+import tantri.cli
+import tantri.cli.file_importer
+import tantri.dipoles.types
+
+# # from dataclasses import dataclass
+#
+#
+# folder in curr dir
+import single_dipole_4
+import single_dipole_4.common
+
+
+_logger = logging.getLogger(__name__)
+
+
+OUT_FIELDNAMES = [
+	"dot_name",
+	"target_cost",
+	"xmin",
+	"xmax",
+	"ymin",
+	"ymax",
+	"zmin",
+	"zmax",
+	"orientation",
+	"avg_filled",
+	"log_magnitude",
+	"calculations_coalesced",
+	"success",
+	"count",
+	"prob",
+]
+
+
+def coalesced_filename(dot_name, target_cost) -> str:
+	return f"coalesced-{dot_name}-{target_cost}.csv"
+
+
+def read_coalesced_csv(parent_path: pathlib.Path, dot_name: str, target_cost):
+	# csv_name = f"coalesced-{dot_name}-{target_cost}.csv"
+	csv_path = parent_path / coalesced_filename(dot_name, target_cost)
+	_logger.debug(f"{csv_path=}")
+	with csv_path.open("r", newline="") as csvfile:
+		reader = csv.DictReader(csvfile)
+		out_list = []
+		for row in reader:
+			row["dot_name"] = dot_name
+			row["target_cost"] = target_cost
+			out_list.append(row)
+		return out_list
+
+
+class Stage03Runner:
+	def __init__(self, config: single_dipole_4.Config):
+		self.config = config
+		_logger.info(f"Initialising Stage03 runner with {config=}")
+
+		self.indexifier = self.config.indexifier()
+
+		self.dots = [
+			d.label
+			for d in tantri.cli.file_importer.read_dots_json_file(
+				self.config.absify(self.config.general_config.dots_json_name)
+			)
+		]
+		_logger.info(f"Got dots {self.dots=}")
+
+	def merge_coalesceds(self, sorted_dir: pathlib.Path):
+		out_path = sorted_dir / single_dipole_4.common.merged_coalesced_name()
+		with out_path.open("w", newline="") as outfile:
+			writer = csv.DictWriter(outfile, OUT_FIELDNAMES)
+			writer.writeheader()
+			for dot in self.dots:
+				for cost in self.config.deepdog_config.costs_to_try:
+					_logger.info(f"Reading {dot=} {cost=}")
+					rows = read_coalesced_csv(sorted_dir, dot, cost)
+					for row in rows:
+						writer.writerow(row)
+
+	def run_in_subdir(self, subdir: pathlib.Path):
+		"""
+		Subdir passed in should be e.g. <>/out/z-10-1/
+		"""
+		with single_dipole_4.common.new_cd(subdir):
+
+			_logger.debug(f"Running inside {subdir=}")
+
+			single_dipole_4.stages.stage03_1.move_all_in_dipoles(subdir / "dipoles")
+
+			seed_index = 0
+
+			sorted_dir = pathlib.Path(single_dipole_4.common.sorted_bayesruns_name())
+			_logger.info(f"{sorted_dir.resolve()}")
+
+			for cost in self.config.deepdog_config.costs_to_try:
+				for dot in self.dots:
+
+					seed_index += 1
+					# TODO pull out
+					sorted_subdir = sorted_dir / f"{dot}-{cost}"
+
+					# TODO need to refactor deepdog probs method so I don't have to dump into args like this
+					probs_args = argparse.Namespace()
+					probs_args.bayesrun_directory = sorted_subdir
+					probs_args.indexify_json = self.config.absify(
+						self.config.general_config.indexes_json_name
+					)
+					probs_args.coalesced_keys = ""
+					probs_args.uncoalesced_outfile = None
+					probs_args.coalesced_outfile = sorted_dir / coalesced_filename(
+						dot, cost
+					)
+
+					deepdog.cli.probs.main.main(probs_args)
+
+			self.merge_coalesceds(sorted_dir)
+
+	# def run_in_subdir(self, subdir: pathlib.Path):
+	#
+
+	def run(self):
+		"""Going to iterate over every folder in out_dir, and execute the subdir stuff inside dirs like <single_dipole_4>/out/z-10-2/dipoles"""
+		out_dir_path = self.config.get_out_dir_path()
+		subdirs = [child for child in out_dir_path.iterdir() if child.is_dir]
+		# _logger.info(f"Going to execute within each of the directories in {subdirs=}")
+		for subdir in subdirs:
+			# skip try finally for now just blow up if problem
+			_logger.debug(f"Running for {subdir=}")
+			dipoles_dir = subdir / "dipoles"
+			dipoles_dir.mkdir(exist_ok=True, parents=False)
+			self.run_in_subdir(subdir)
+
+
+def parse_args():
+
+	parser = argparse.ArgumentParser(
+		"Stage03 Runner", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+	)
+
+	parser.add_argument(
+		"--log-file",
+		type=str,
+		help="A filename for logging to, if not provided will only log to stderr",
+		default=None,
+	)
+	args = parser.parse_args()
+	return args
+
+
+def main():
+	args = parse_args()
+
+	tantri_configs = [
+		single_dipole_4.TantriConfig(31415, 100, 5, 100000),
+		single_dipole_4.TantriConfig(314, 100, 0.00005, 100000),
+	]
+	generation_config = single_dipole_4.GenerationConfig(
+		tantri_configs=tantri_configs,
+		counts=[1],
+		num_replicas=3,
+		orientations=[tantri.dipoles.types.Orientation.Z],
+	)
+
+	config = single_dipole_4.Config(generation_config=generation_config)
+
+	single_dipole_4.common.set_up_logging(config, args.log_file)
+
+	_logger.info("Generating our data, for the following iterations")
+
+	_logger.info(config)
+
+	runner = Stage03Runner(config)
+	runner.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/kalpa/stages/stage03_1.py
+++ b/kalpa/stages/stage03_1.py
@@ -0,0 +1,62 @@
+import single_dipole_4.common
+import os
+import logging
+import argparse
+import deepdog.results
+import pathlib
+from dataclasses import dataclass
+
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BFile:
+	file: pathlib.Path
+	target_dirname: str
+
+
+def target_dir(filename) -> BFile:
+	fileresult = deepdog.results._parse_output_filename(
+		pathlib.Path("dipoles") / filename
+	)
+	_logger.debug(f"Parsed {filename=} to {fileresult=}")
+	parsed_slug = deepdog.results.parse_file_slug(fileresult.filename_slug)
+	_logger.debug(f"{parsed_slug=}")
+	if parsed_slug is None:
+		raise ValueError(f"Could not parse {filename=}")
+	tag = parsed_slug["tag"]
+	cost = parsed_slug["target_cost"]
+	target_dirname = f"{single_dipole_4.common.sorted_bayesruns_name()}/{tag}-{cost}"
+	file = fileresult.path
+
+	bfile = BFile(file=file, target_dirname=target_dirname)
+
+	_logger.info(f"For {filename=} got {bfile=}")
+
+	return bfile
+
+
+def move_file(bfile: BFile):
+	name = bfile.file.name
+	_logger.debug(f"Moving {bfile=}")
+	target_dirpath = pathlib.Path(bfile.target_dirname)
+	target_dirpath.mkdir(parents=True, exist_ok=True)
+	bfile.file.rename(target_dirpath / name)
+
+
+def move_all_in_dipoles(dipoles_path: pathlib.Path):
+
+	_logger.info(f"Going to try to move files in {dipoles_path=}")
+
+	sorted_dir = pathlib.Path(single_dipole_4.common.sorted_bayesruns_name())
+	sorted_dir.mkdir(exist_ok=True, parents=True)
+
+	bayesruns = [
+		target_dir(f) for f in dipoles_path.iterdir() if f.name.endswith("bayesrun.csv")
+	]
+	_logger.debug([f.name for f in dipoles_path.iterdir()])
+	_logger.debug(f"{bayesruns=}")
+	for bfile in bayesruns:
+		_logger.debug(f"Moving {bfile=}")
+		move_file(bfile)
--- a/kalpa/stages/stage04.py
+++ b/kalpa/stages/stage04.py
@@ -0,0 +1,217 @@
+import typing
+import logging
+import argparse
+import csv
+import single_dipole_4
+import single_dipole_4.common
+import single_dipole_4.inference_coalesce
+import tantri.dipoles.types
+
+_logger = logging.getLogger(__name__)
+
+
+ORIENTATION_DICT = {
+	"XY": "fixedxy",
+	"RANDOM": "free",
+	"Z": "fixedz",
+}
+
+# could be detected but why not just hardcode
+MERGED_OUT_FIELDNAMES = [
+	"subdir_name",
+	"actual_orientation",
+	"actual_avg_filled",
+	"generation_replica_index",
+	"is_row_actual",
+	# old fields
+	"dot_name",
+	"target_cost",
+	"xmin",
+	"xmax",
+	"ymin",
+	"ymax",
+	"zmin",
+	"zmax",
+	"orientation",
+	"avg_filled",
+	"log_magnitude",
+	"calculations_coalesced",
+	"success",
+	"count",
+	"prob",
+]
+
+INFERENCED_OUT_FIELDNAMES = [
+	"subdir_name",
+	"actual_orientation",
+	"actual_avg_filled",
+	"generation_replica_index",
+	"is_row_actual",
+	# old fields
+	"dot_name",
+	"target_cost",
+	"xmin",
+	"xmax",
+	"ymin",
+	"ymax",
+	"zmin",
+	"zmax",
+	"orientation",
+	"avg_filled",
+	"log_magnitude",
+	"calculations_coalesced",
+	"success",
+	"count",
+	"prob",
+	"coalesced_prob",
+]
+
+
+def is_actual(row, actual_normal_orientation, actual_count):
+	_logger.debug(f"Check orientations")
+	row_or = row["orientation"]
+	_logger.debug(f"row: {row_or}, actual: {actual_normal_orientation}")
+	is_or = row_or == actual_normal_orientation
+
+	_logger.debug(f"Check counts")
+	row_count = row["avg_filled"]
+	_logger.debug(f"row: {row_count}, actual: {actual_count}")
+	is_count = int(row_count) == int(actual_count)
+
+	_logger.debug(f"Check magnitude")
+	row_logmag = row["log_magnitude"]
+	# TODO hardcoding
+	is_mag = int(row_logmag) == 2
+
+	_logger.debug(f"{is_or=} and {is_count=}and {is_mag=}")
+	if is_or and is_count and is_mag:
+		_logger.debug("Returning 1")
+		return 1
+	else:
+		_logger.debug("Returning 0")
+		return 0
+
+
+class Stage04Runner:
+	def __init__(self, config: single_dipole_4.Config):
+		self.config = config
+		_logger.info(f"Initialising Stage04 runner with {config=}")
+
+		self.indexifier = self.config.indexifier()
+
+	def read_merged_coalesced_csv(self, orientation, count, replica) -> typing.Sequence:
+		subdir_name = f"{orientation.lower()}-{count}-{replica}"
+		subdir_path = self.config.get_out_dir_path() / subdir_name
+		csv_path = (
+			subdir_path
+			/ single_dipole_4.common.sorted_bayesruns_name()
+			/ single_dipole_4.common.merged_coalesced_name()
+		)
+		_logger.debug(f"Reading {csv_path=}")
+		with csv_path.open(mode="r", newline="") as csvfile:
+			reader = csv.DictReader(csvfile)
+			out_list = []
+			for row in reader:
+				normal_orientation = ORIENTATION_DICT[orientation]
+				row["subdir_name"] = subdir_name
+				row["actual_orientation"] = ORIENTATION_DICT[orientation]
+				row["actual_avg_filled"] = count
+				row["generation_replica_index"] = replica
+				row["is_row_actual"] = is_actual(row, normal_orientation, count)
+				out_list.append(row)
+			return out_list
+
+	def run(self):
+		megamerged_path = (
+			self.config.get_out_dir_path() / self.config.general_config.mega_merged_name
+		)
+
+		# normal merged
+		with megamerged_path.open(mode="w", newline="") as outfile:
+			writer = csv.DictWriter(outfile, MERGED_OUT_FIELDNAMES)
+			writer.writeheader()
+			for count in self.config.generation_config.counts:
+				for orientation in self.config.generation_config.orientations:
+					for replica in range(self.config.generation_config.num_replicas):
+						_logger.info(f"Reading {count=} {orientation=} {replica=}")
+						rows = self.read_merged_coalesced_csv(
+							orientation, count, replica
+						)
+						for row in rows:
+							writer.writerow(row)
+
+		# merge with inference
+
+		with megamerged_path.open(mode="r", newline="") as infile:
+			# Note that if you pass in fieldnames to a DictReader it doesn't skip. So this is bad:
+			# 	megamerged_reader = csv.DictReader(infile, fieldnames=MERGED_OUT_FIELDNAMES)
+			megamerged_reader = csv.DictReader(infile)
+			rows = [row for row in megamerged_reader]
+			_logger.debug(rows[0])
+			coalescer = single_dipole_4.inference_coalesce.Coalescer(
+				rows, num_replicas=self.config.generation_config.num_replicas
+			)
+			_logger.info(coalescer.actual_dict.keys())
+
+			# coalescer.coalesce_generations(("fixedxy", "1"), "dot1")
+
+			coalesced = coalescer.coalesce_all()
+
+			inferenced_path = (
+				self.config.get_out_dir_path()
+				/ self.config.general_config.mega_merged_inferenced_name
+			)
+			with inferenced_path.open(mode="w", newline="") as outfile:
+				writer = csv.DictWriter(outfile, fieldnames=INFERENCED_OUT_FIELDNAMES)
+				writer.writeheader()
+				for val in coalesced.values():
+					for dots in val.values():
+						for generation in dots.values():
+							for row in generation.values():
+								writer.writerow(row)
+
+
+def parse_args():
+
+	parser = argparse.ArgumentParser(
+		"put files in directory", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+	)
+
+	parser.add_argument(
+		"--log-file",
+		type=str,
+		help="A filename for logging to, if not provided will only log to stderr",
+		default=None,
+	)
+	args = parser.parse_args()
+	return args
+
+
+def main():
+	args = parse_args()
+
+	tantri_configs = [
+		single_dipole_4.TantriConfig(31415, 100, 5, 100000),
+		single_dipole_4.TantriConfig(314, 100, 0.00005, 100000),
+	]
+	generation_config = single_dipole_4.GenerationConfig(
+		tantri_configs=tantri_configs,
+		counts=[1],
+		num_replicas=3,
+		orientations=[tantri.dipoles.types.Orientation.Z],
+	)
+
+	config = single_dipole_4.Config(generation_config=generation_config)
+
+	single_dipole_4.common.set_up_logging(config, args.log_file)
+
+	_logger.info("Generating our data, for the following iterations")
+
+	_logger.info(config)
+
+	runner = Stage04Runner(config)
+	runner.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/poetry.lock
+++ b/poetry.lock
@@ -0,0 +1,287 @@
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+
+[[package]]
+name = "black"
+version = "24.8.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6"},
+    {file = "black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb"},
+    {file = "black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42"},
+    {file = "black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a"},
+    {file = "black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1"},
+    {file = "black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af"},
+    {file = "black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4"},
+    {file = "black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af"},
+    {file = "black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368"},
+    {file = "black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed"},
+    {file = "black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018"},
+    {file = "black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2"},
+    {file = "black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd"},
+    {file = "black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2"},
+    {file = "black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e"},
+    {file = "black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920"},
+    {file = "black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c"},
+    {file = "black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e"},
+    {file = "black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47"},
+    {file = "black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb"},
+    {file = "black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed"},
+    {file = "black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "deepdog"
+version = "1.3.0"
+description = ""
+optional = false
+python-versions = ">=3.8.1,<3.10"
+files = []
+develop = true
+
+[package.dependencies]
+numpy = "1.22.3"
+pdme = "^1.5.0"
+scipy = "1.10"
+tqdm = "^4.66.2"
+
+[package.source]
+type = "directory"
+url = "../deepdog"
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "numpy"
+version = "1.22.3"
+description = "NumPy is the fundamental package for array computing with Python."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "numpy-1.22.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75"},
+    {file = "numpy-1.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab"},
+    {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e"},
+    {file = "numpy-1.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4"},
+    {file = "numpy-1.22.3-cp310-cp310-win32.whl", hash = "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430"},
+    {file = "numpy-1.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4"},
+    {file = "numpy-1.22.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce"},
+    {file = "numpy-1.22.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe"},
+    {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5"},
+    {file = "numpy-1.22.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1"},
+    {file = "numpy-1.22.3-cp38-cp38-win32.whl", hash = "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62"},
+    {file = "numpy-1.22.3-cp38-cp38-win_amd64.whl", hash = "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676"},
+    {file = "numpy-1.22.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123"},
+    {file = "numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802"},
+    {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d"},
+    {file = "numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168"},
+    {file = "numpy-1.22.3-cp39-cp39-win32.whl", hash = "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"},
+    {file = "numpy-1.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a"},
+    {file = "numpy-1.22.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f"},
+    {file = "numpy-1.22.3.zip", hash = "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18"},
+]
+
+[[package]]
+name = "packaging"
+version = "24.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
+[[package]]
+name = "pdme"
+version = "1.5.0"
+description = "Python dipole model evaluator"
+optional = false
+python-versions = "<3.10,>=3.8.1"
+files = [
+    {file = "pdme-1.5.0-py3-none-any.whl", hash = "sha256:1b4fa30ba98a336957b3029563552d73286a3a5f932809ac1330e65a1f61c363"},
+    {file = "pdme-1.5.0.tar.gz", hash = "sha256:cc0ac4ffab2994e08b4efde2991c6d9dccb2942c7e33c4be3b52e068366526d1"},
+]
+
+[package.dependencies]
+numpy = ">=1.22.3,<2.0.0"
+scipy = ">=1.10,<1.11"
+
+[[package]]
+name = "platformdirs"
+version = "4.2.2"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
+    {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
+type = ["mypy (>=1.8)"]
+
+[[package]]
+name = "scipy"
+version = "1.10.0"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = false
+python-versions = "<3.12,>=3.8"
+files = [
+    {file = "scipy-1.10.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:b901b423c91281a974f6cd1c36f5c6c523e665b5a6d5e80fcb2334e14670eefd"},
+    {file = "scipy-1.10.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:16ba05d3d1b9f2141004f3f36888e05894a525960b07f4c2bfc0456b955a00be"},
+    {file = "scipy-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:151f066fe7d6653c3ffefd489497b8fa66d7316e3e0d0c0f7ff6acca1b802809"},
+    {file = "scipy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f9ea0a37aca111a407cb98aa4e8dfde6e5d9333bae06dfa5d938d14c80bb5c3"},
+    {file = "scipy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:27e548276b5a88b51212b61f6dda49a24acf5d770dff940bd372b3f7ced8c6c2"},
+    {file = "scipy-1.10.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:42ab8b9e7dc1ebe248e55f54eea5307b6ab15011a7883367af48dd781d1312e4"},
+    {file = "scipy-1.10.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e096b062d2efdea57f972d232358cb068413dc54eec4f24158bcbb5cb8bddfd8"},
+    {file = "scipy-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4df25a28bd22c990b22129d3c637fd5c3be4b7c94f975dca909d8bab3309b694"},
+    {file = "scipy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ad449db4e0820e4b42baccefc98ec772ad7818dcbc9e28b85aa05a536b0f1a2"},
+    {file = "scipy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:6faf86ef7717891195ae0537e48da7524d30bc3b828b30c9b115d04ea42f076f"},
+    {file = "scipy-1.10.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:4bd0e3278126bc882d10414436e58fa3f1eca0aa88b534fcbf80ed47e854f46c"},
+    {file = "scipy-1.10.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:38bfbd18dcc69eeb589811e77fae552fa923067fdfbb2e171c9eac749885f210"},
+    {file = "scipy-1.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ab2a58064836632e2cec31ca197d3695c86b066bc4818052b3f5381bfd2a728"},
+    {file = "scipy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd7a30970c29d9768a7164f564d1fbf2842bfc77b7d114a99bc32703ce0bf48"},
+    {file = "scipy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:9b878c671655864af59c108c20e4da1e796154bd78c0ed6bb02bc41c84625686"},
+    {file = "scipy-1.10.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3afcbddb4488ac950ce1147e7580178b333a29cd43524c689b2e3543a080a2c8"},
+    {file = "scipy-1.10.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:6e4497e5142f325a5423ff5fda2fff5b5d953da028637ff7c704378c8c284ea7"},
+    {file = "scipy-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:441cab2166607c82e6d7a8683779cb89ba0f475b983c7e4ab88f3668e268c143"},
+    {file = "scipy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0490dc499fe23e4be35b8b6dd1e60a4a34f0c4adb30ac671e6332446b3cbbb5a"},
+    {file = "scipy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:954ff69d2d1bf666b794c1d7216e0a746c9d9289096a64ab3355a17c7c59db54"},
+    {file = "scipy-1.10.0.tar.gz", hash = "sha256:c8b3cbc636a87a89b770c6afc999baa6bcbb01691b5ccbbc1b1791c7c0a07540"},
+]
+
+[package.dependencies]
+numpy = ">=1.19.5,<1.27.0"
+
+[package.extras]
+dev = ["click", "doit (>=0.36.0)", "flake8", "mypy", "pycodestyle", "pydevtool", "rich-click", "typing_extensions"]
+doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"]
+test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+
+[[package]]
+name = "tantri"
+version = "1.1.0"
+description = "Python dipole model evaluator"
+optional = false
+python-versions = ">=3.8.1,<3.10"
+files = []
+develop = true
+
+[package.dependencies]
+click = "^8.1.7"
+numpy = "^1.22.3"
+scipy = "~1.10"
+
+[package.source]
+type = "directory"
+url = "../tantri"
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+
+[[package]]
+name = "tqdm"
+version = "4.66.5"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"},
+    {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
+[metadata]
+lock-version = "2.0"
+python-versions = ">=3.8.1,<3.10"
+content-hash = "3c0c0391fcd01dabcbc7add8d92f0dac5c218e78aa375df1e9845c514a26dd83"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,27 @@
+[tool.poetry]
+name = "single-dipole-4"
+version = "0.0.0"
+description = "running single dipole 4 test"
+authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<3.10"
+deepdog = {path = "../deepdog", develop = true}
+tantri = {path = "../tantri", develop = true}
+pdme = "^1.5.0"
+
+
+[tool.poetry.group.dev.dependencies]
+black = "^24.8.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+stage01 = "single_dipole_4.stages.stage01:main"
+stage02 = "single_dipole_4.stages.stage02:main"
+stage03 = "single_dipole_4.stages.stage03:main"
+stage04 = "single_dipole_4.stages.stage04:main"
+sd4 = "single_dipole_4.stages:main"
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+# script to do the thing
+
+set -euxo pipefail
+
+
+(
+
+	mkdir -p logs
+	poetry --version
+	
+	./01-run_make_configs.sh
+	
+	./02-run_gen.sh
+	
+	# ./03-run_probs.sh
+	./04-cleanup-probs.sh
+
+) 2>&1 | tee -a output.log