feat: Now can run through config file, with smarter label

2025-02-22 02:56:41 -06:00
parent 4729543c4e
commit 2243cedd21
4 changed files with 246 additions and 224 deletions
--- a/kalpaa.toml
+++ b/kalpaa.toml
@@ -0,0 +1,22 @@
+[general_config]
+root_directory = "out"
+measurement_type = "electric-potential"
+
+[generation_config]
+counts = [1, 5, 10]
+num_replicas = 2
+tantri_configs = [
+	{index_seed_starter = 15151, num_seeds = 5, delta_t = 0.01, num_iterations = 100},
+	{index_seed_starter = 1234, num_seeds = 100, delta_t = 1, num_iterations = 200}
+]
+
+[generation_config.override_dipole_configs]
+scenario1 = [
+	{p = [3, 5, 7], s = [2, 4, 6], w = 10},
+	{p = [30, 50, 70], s = [20, 40, 60], w = 10.55},
+] 
+
+[deepdog_config]
+costs_to_try = [5, 2, 1, 0.5, 0.2]
+target_success = 2000
+use_log_noise = true
--- a/kalpaa/config/init.py
+++ b/kalpaa/config/init.py
@@ -1,163 +1,31 @@
-import json
-import deepdog.indexify
-from dataclasses import dataclass, field
-import typing
-import tantri.dipoles.types
-import pathlib
-from enum import Enum, IntEnum
-import logging
+from kalpaa.config.config import (
+	MeasurementTypeEnum,
+	SkipToStage,
+	GeneralConfig,
+	TantriConfig,
+	GenerationConfig,
+	DeepdogConfig,
+	Config,
+	ReducedModelParams,
+)
+from kalpaa.config.config_reader import (
+	read_config_dict,
+	serialize_config,
+	read_config,
+	read_general_config_dict,
+)

-_logger = logging.getLogger(__name__)
-
-
-class MeasurementTypeEnum(Enum):
-	POTENTIAL = "electric-potential"
-	X_ELECTRIC_FIELD = "x-electric-field"
-
-
-class SkipToStage(IntEnum):
-	# shouldn't need this lol
-	STAGE_01 = 0
-	STAGE_02 = 1
-	STAGE_03 = 2
-	STAGE_04 = 3
-
-
-# Copy over some random constants to see if they're ever reused
-
-
-@dataclass(frozen=True)
-class GeneralConfig:
-	dots_json_name: str = "dots.json"
-	indexes_json_name: str = "indexes.json"
-	out_dir_name: str = "out"
-	log_pattern: str = (
-		"%(asctime)s | %(process)d | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
-	)
-	measurement_type: MeasurementTypeEnum = MeasurementTypeEnum.X_ELECTRIC_FIELD
-	root_directory: pathlib.Path = pathlib.Path.cwd()
-
-	mega_merged_name: str = "mega_merged_coalesced.csv"
-	mega_merged_inferenced_name: str = "mega_merged_coalesced_inferenced.csv"
-
-	skip_to_stage: typing.Optional[int] = None
-
-
-@dataclass(frozen=True)
-class TantriConfig:
-	index_seed_starter: int = 31415
-	num_seeds: int = 100
-	delta_t: float = 0.05
-	num_iterations: int = 100000
-	# sample_rate = 10
-
-
-@dataclass(frozen=True)
-class GenerationConfig:
-	# Interact with indexes.json, probably should be a subset
-	counts: typing.Sequence[int] = field(default_factory=lambda: [1, 10])
-	orientations: typing.Sequence[tantri.dipoles.types.Orientation] = field(
-		default_factory=lambda: [
-			tantri.dipoles.types.Orientation.RANDOM,
-			tantri.dipoles.types.Orientation.Z,
-			tantri.dipoles.types.Orientation.XY,
-		]
-	)
-	# TODO: what's replica here?
-	num_replicas: int = 3
-
-	# the above three can be overrided with manually specified configurations
-	override_dipole_configs: typing.Optional[
-		typing.Mapping[str, typing.Sequence[tantri.dipoles.types.DipoleTO]]
-	] = None
-
-	tantri_configs: typing.List[TantriConfig] = field(
-		default_factory=lambda: [TantriConfig()]
-	)
-
-	num_bin_time_series: int = 25
-	bin_log_width: float = 0.25
-
-
-@dataclass(frozen=True)
-class DeepdogConfig:
-	"""
-	Class that holds all of the computational parameters
-	"""
-
-	costs_to_try: typing.Sequence[float] = field(default_factory=lambda: [10, 1, 0.1])
-	target_success: int = 1000
-	max_monte_carlo_cycles_steps: int = 20
-	# Whether to use a log log cost function
-	use_log_noise: bool = False
-
-
-@dataclass(frozen=True)
-class Config:
-	generation_config: GenerationConfig = GenerationConfig()
-	general_config: GeneralConfig = GeneralConfig()
-	deepdog_config: DeepdogConfig = DeepdogConfig()
-
-	def absify(self, filename: str) -> pathlib.Path:
-		ret = (self.general_config.root_directory / filename).resolve()
-		_logger.debug(f"Absifying {filename=}, geting {ret}")
-		return ret
-
-	def get_out_dir_path(self) -> pathlib.Path:
-		return self.absify(self.general_config.out_dir_name)
-
-	def get_dots_json_path(self) -> pathlib.Path:
-		return self.absify(self.general_config.dots_json_name)
-
-	def indexifier(self) -> deepdog.indexify.Indexifier:
-		with self.absify(self.general_config.indexes_json_name).open(
-			"r"
-		) as indexify_json_file:
-			indexify_spec = json.load(indexify_json_file)
-			indexify_data = indexify_spec["indexes"]
-			if "seed_spec" in indexify_spec:
-				seed_spec = indexify_spec["seed_spec"]
-				indexify_data[seed_spec["field_name"]] = list(
-					range(seed_spec["num_seeds"])
-				)
-
-			_logger.info(f"loading indexifier with data {indexify_data=}")
-			return deepdog.indexify.Indexifier(indexify_data)
-
-
-@dataclass(frozen=True)
-class ReducedModelParams:
-	"""
-	Units usually in 10s of nm for distance, s or Hz as needed for time units, log units are log base 10 of Hz or s values.
-	"""
-
-	x_min: float = -20
-	x_max: float = 20
-	y_min: float = -10
-	y_max: float = 10
-	z_min: float = 5
-	z_max: float = 6.5
-	w_log_min: float = -5
-	w_log_max: float = 1
-	count: int = 1
-	log_magnitude: float = 2
-	orientation: tantri.dipoles.types.Orientation = (
-		tantri.dipoles.types.Orientation.RANDOM
-	)
-
-	def config_dict(self, seed: int) -> typing.Dict[str, typing.Any]:
-		output_dict = {
-			"x_min": self.x_min,
-			"x_max": self.x_max,
-			"y_min": self.y_min,
-			"y_max": self.y_max,
-			"z_min": self.z_min,
-			"z_max": self.z_max,
-			"mag": 10**self.log_magnitude,
-			"w_log_min": self.w_log_min,
-			"w_log_max": self.w_log_max,
-			"orientation": self.orientation,
-			"dipole_count": self.count,
-			"generation_seed": seed,
-		}
-		return output_dict
+__all__ = [
+	"MeasurementTypeEnum",
+	"SkipToStage",
+	"GeneralConfig",
+	"TantriConfig",
+	"GenerationConfig",
+	"DeepdogConfig",
+	"Config",
+	"ReducedModelParams",
+	"read_config_dict",
+	"serialize_config",
+	"read_config",
+	"read_general_config_dict",
+]
--- a/kalpaa/config/config.py
+++ b/kalpaa/config/config.py
@@ -0,0 +1,163 @@
+import json
+import deepdog.indexify
+from dataclasses import dataclass, field
+import typing
+import tantri.dipoles.types
+import pathlib
+from enum import Enum, IntEnum
+import logging
+
+_logger = logging.getLogger(__name__)
+
+
+class MeasurementTypeEnum(Enum):
+	POTENTIAL = "electric-potential"
+	X_ELECTRIC_FIELD = "x-electric-field"
+
+
+class SkipToStage(IntEnum):
+	# shouldn't need this lol
+	STAGE_01 = 0
+	STAGE_02 = 1
+	STAGE_03 = 2
+	STAGE_04 = 3
+
+
+# Copy over some random constants to see if they're ever reused
+
+
+@dataclass(frozen=True)
+class GeneralConfig:
+	dots_json_name: str = "dots.json"
+	indexes_json_name: str = "indexes.json"
+	out_dir_name: str = "out"
+	log_pattern: str = (
+		"%(asctime)s | %(process)d | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
+	)
+	measurement_type: MeasurementTypeEnum = MeasurementTypeEnum.X_ELECTRIC_FIELD
+	root_directory: pathlib.Path = pathlib.Path.cwd()
+
+	mega_merged_name: str = "mega_merged_coalesced.csv"
+	mega_merged_inferenced_name: str = "mega_merged_coalesced_inferenced.csv"
+
+	skip_to_stage: typing.Optional[int] = None
+
+
+@dataclass(frozen=True)
+class TantriConfig:
+	index_seed_starter: int = 31415
+	num_seeds: int = 100
+	delta_t: float = 0.05
+	num_iterations: int = 100000
+	# sample_rate = 10
+
+
+@dataclass(frozen=True)
+class GenerationConfig:
+	# Interact with indexes.json, probably should be a subset
+	counts: typing.Sequence[int] = field(default_factory=lambda: [1, 10])
+	orientations: typing.Sequence[tantri.dipoles.types.Orientation] = field(
+		default_factory=lambda: [
+			tantri.dipoles.types.Orientation.RANDOM,
+			tantri.dipoles.types.Orientation.Z,
+			tantri.dipoles.types.Orientation.XY,
+		]
+	)
+	# TODO: what's replica here?
+	num_replicas: int = 3
+
+	# the above three can be overrided with manually specified configurations
+	override_dipole_configs: typing.Optional[
+		typing.Mapping[str, typing.Sequence[tantri.dipoles.types.DipoleTO]]
+	] = None
+
+	tantri_configs: typing.List[TantriConfig] = field(
+		default_factory=lambda: [TantriConfig()]
+	)
+
+	num_bin_time_series: int = 25
+	bin_log_width: float = 0.25
+
+
+@dataclass(frozen=True)
+class DeepdogConfig:
+	"""
+	Class that holds all of the computational parameters
+	"""
+
+	costs_to_try: typing.Sequence[float] = field(default_factory=lambda: [10, 1, 0.1])
+	target_success: int = 1000
+	max_monte_carlo_cycles_steps: int = 20
+	# Whether to use a log log cost function
+	use_log_noise: bool = False
+
+
+@dataclass(frozen=True)
+class Config:
+	generation_config: GenerationConfig = GenerationConfig()
+	general_config: GeneralConfig = GeneralConfig()
+	deepdog_config: DeepdogConfig = DeepdogConfig()
+
+	def absify(self, filename: str) -> pathlib.Path:
+		ret = (self.general_config.root_directory / filename).resolve()
+		_logger.debug(f"Absifying {filename=}, for root directory {self.general_config.root_directory}, geting {ret}")
+		return ret
+
+	def get_out_dir_path(self) -> pathlib.Path:
+		return self.absify(self.general_config.out_dir_name)
+
+	def get_dots_json_path(self) -> pathlib.Path:
+		return self.absify(self.general_config.dots_json_name)
+
+	def indexifier(self) -> deepdog.indexify.Indexifier:
+		with self.absify(self.general_config.indexes_json_name).open(
+			"r"
+		) as indexify_json_file:
+			indexify_spec = json.load(indexify_json_file)
+			indexify_data = indexify_spec["indexes"]
+			if "seed_spec" in indexify_spec:
+				seed_spec = indexify_spec["seed_spec"]
+				indexify_data[seed_spec["field_name"]] = list(
+					range(seed_spec["num_seeds"])
+				)
+
+			_logger.info(f"loading indexifier with data {indexify_data=}")
+			return deepdog.indexify.Indexifier(indexify_data)
+
+
+@dataclass(frozen=True)
+class ReducedModelParams:
+	"""
+	Units usually in 10s of nm for distance, s or Hz as needed for time units, log units are log base 10 of Hz or s values.
+	"""
+
+	x_min: float = -20
+	x_max: float = 20
+	y_min: float = -10
+	y_max: float = 10
+	z_min: float = 5
+	z_max: float = 6.5
+	w_log_min: float = -5
+	w_log_max: float = 1
+	count: int = 1
+	log_magnitude: float = 2
+	orientation: tantri.dipoles.types.Orientation = (
+		tantri.dipoles.types.Orientation.RANDOM
+	)
+
+	def config_dict(self, seed: int) -> typing.Dict[str, typing.Any]:
+		output_dict = {
+			"x_min": self.x_min,
+			"x_max": self.x_max,
+			"y_min": self.y_min,
+			"y_max": self.y_max,
+			"z_min": self.z_min,
+			"z_max": self.z_max,
+			"mag": 10**self.log_magnitude,
+			"w_log_min": self.w_log_min,
+			"w_log_max": self.w_log_max,
+			"orientation": self.orientation,
+			"dipole_count": self.count,
+			"generation_seed": seed,
+		}
+		return output_dict
--- a/kalpaa/stages/init.py
+++ b/kalpaa/stages/init.py
@@ -1,12 +1,13 @@
 import pathlib
 import logging

+import dataclasses
+
 import kalpaa.stages.stage01
 import kalpaa.stages.stage02
 import kalpaa.stages.stage03
 import kalpaa.stages.stage04
 import kalpaa.common
-import tantri.dipoles.types
 import kalpaa.config

 import argparse
@@ -72,6 +73,22 @@ def parse_args():
 		default=None,
 	)

+
+	parser.add_argument(
+		"-d",
+		"--directory-label",
+		type=str,
+		help="Label for directory to put files in within root",
+		default="output1",
+	)
+
+	parser.add_argument(
+		"--config-file",
+		type=str,
+		help="kalpaa.toml file to use for configuration",
+		default="kalpaa.toml",
+	)
+
 	parser.add_argument(
 		"-s",
 		"--skip-to-stage",
@@ -86,52 +103,15 @@ def parse_args():
 def main():
 	args = parse_args()

-	tantri_configs = [
-		kalpaa.TantriConfig(123456, 50, 0.5, 100000),
-		# kalpa.TantriConfig(1234, 50, 0.0005, 10000),
-	]
-
-	# override_config = {
-	# 	# "test1": [
-	# 	# 	tantri.dipoles.types.DipoleTO(
-	# 	# 		numpy.array([0, 0, 100]),
-	# 	# 		numpy.array([-2, -2, 2.9]),
-	# 	# 		0.0005
-	# 	# 	)
-	# 	# ],
-	# 	"two_dipole_connors_geom": [
-	# 		tantri.dipoles.types.DipoleTO(
-	# 			numpy.array([0, 0, 100]), numpy.array([-2, -2, 5.75]), 0.0005
-	# 		),
-	# 		tantri.dipoles.types.DipoleTO(
-	# 			numpy.array([0, 0, 100]), numpy.array([6, 2, 5.75]), 0.05
-	# 		),
-	# 	],
-	# 	"two_dipole_connors_geom_omegaswap": [
-	# 		tantri.dipoles.types.DipoleTO(
-	# 			numpy.array([0, 0, 100]), numpy.array([-2, -2, 5.75]), 0.05
-	# 		),
-	# 		tantri.dipoles.types.DipoleTO(
-	# 			numpy.array([0, 0, 100]), numpy.array([6, 2, 5.75]), 0.0005
-	# 		),
-	# 	],
-	# }
-
-	generation_config = kalpaa.GenerationConfig(
-		tantri_configs=tantri_configs,
-		counts=[3, 31],
-		num_replicas=5,
-		# let's test this out
-		# override_dipole_configs=override_config,
-		orientations=[tantri.dipoles.types.Orientation.Z],
-		num_bin_time_series=25,
-	)
+	config = kalpaa.config.read_config(pathlib.Path(args.config_file))
+	label = args.directory_label

 	if args.override_root is None:
 		_logger.info("root dir not given")
-		root = pathlib.Path("plots0")
+		# root = pathlib.Path("hardcodedoutplace")
+		root = config.general_config.root_directory / label
 	else:
-		root = pathlib.Path(args.override_root)
+		root = pathlib.Path(args.override_root) / label

 	if args.skip_to_stage is not None:
 		if args.skip_to_stage not in [1, 2, 3, 4]:
@@ -141,31 +121,20 @@ def main():
 	else:
 		skip = None

-	general_config = kalpaa.GeneralConfig(
-		measurement_type=kalpaa.MeasurementTypeEnum.POTENTIAL,
-		out_dir_name=str(root / "out"),
-		skip_to_stage=skip,
-	)
+	_logger.info(skip)

-	# kalpa.GeneralConfig
+	kalpaa.common.set_up_logging(config, str(root / f"logs/kalpaa.log"))

-	deepdog_config = kalpaa.DeepdogConfig(
-		costs_to_try=[2, 1],
-		max_monte_carlo_cycles_steps=20,
-		target_success=200,
-		use_log_noise=True,
-	)
+	_logger.info(f"Root dir is {root}, copying over {config.general_config.indexes_json_name}, {config.general_config.dots_json_name} and {args.config_file}")
+	for file in [config.general_config.indexes_json_name, config.general_config.dots_json_name, args.config_file]:
+		_logger.info(f"Copying {file} to {root}")
+		(root / file).write_text((pathlib.Path.cwd() / file).read_text())

-	config = kalpaa.Config(
-		generation_config=generation_config,
-		general_config=general_config,
-		deepdog_config=deepdog_config,
-	)

-	kalpaa.common.set_up_logging(config, str(root / f"logs/{root}.log"))
+	overridden_config = dataclasses.replace(config, general_config=dataclasses.replace(config.general_config, root_directory=root.resolve(), skip_to_stage=skip))

 	_logger.info(f"Got {config=}")
-	runner = Runner(config)
+	runner = Runner(overridden_config)
 	runner.run()