Compare commits
19 Commits
Author | SHA1 | Date | |
---|---|---|---|
71dc906a96 | |||
24c6e311c1 | |||
4dd3004a7b | |||
46f6b6cdf1 | |||
c8435b4b2a | |||
c2375e6f5c | |||
a1b59cd18b | |||
53f8993f2b | |||
700f32ea58 | |||
3737252c4b | |||
6f79a49e59 | |||
d962ecb11e | |||
7beca501bf | |||
5425ce1362 | |||
6a5c5931d4 | |||
36ff75576c | |||
e76c619c8b | |||
c881da2837 | |||
1a1ecc01ea |
2
.gitignore
vendored
2
.gitignore
vendored
@ -145,3 +145,5 @@ cython_debug/
|
||||
*.csv
|
||||
|
||||
local_scripts/
|
||||
|
||||
.vscode
|
||||
|
37
CHANGELOG.md
37
CHANGELOG.md
@ -2,6 +2,43 @@
|
||||
|
||||
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
||||
|
||||
## [1.7.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.6.0...1.7.0) (2025-02-27)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* adds configurable skip if file exists ([24c6e31](https://gitea.deepak.science:2222/physics/deepdog/commit/24c6e311c1d3067eb98cc60e6ca38d76373bf08e))
|
||||
|
||||
## [1.6.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.5.0...1.6.0) (2025-02-27)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* Adds ability to parse bayesruns without timestamps ([46f6b6c](https://gitea.deepak.science:2222/physics/deepdog/commit/46f6b6cdf15c67aedf0c871d201b8db320bccbdf))
|
||||
* allows negative log magnitude strings in models ([c8435b4](https://gitea.deepak.science:2222/physics/deepdog/commit/c8435b4b2a6e4b89030f53b5734eb743e2003fb7))
|
||||
|
||||
## [1.5.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.4.0...1.5.0) (2024-12-30)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add configurable max number of dipoles to write ([a1b59cd](https://gitea.deepak.science:2222/physics/deepdog/commit/a1b59cd18b30359328a09210d9393f211aab30c2))
|
||||
* add configurable max number of dipoles to write ([53f8993](https://gitea.deepak.science:2222/physics/deepdog/commit/53f8993f2b155228fff5cbee84f10c62eb149a1f))
|
||||
|
||||
## [1.4.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.3.0...1.4.0) (2024-09-04)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add subset sim probs command for bayes for subset simulation results ([c881da2](https://gitea.deepak.science:2222/physics/deepdog/commit/c881da28370a1e51d062e1a7edaa62af6eb98d0a))
|
||||
* allows some betetr matching for single_dipole runs ([5425ce1](https://gitea.deepak.science:2222/physics/deepdog/commit/5425ce1362919af4cc4dbd5813df3be8d877b198))
|
||||
* indexifier now has len ([d962ecb](https://gitea.deepak.science:2222/physics/deepdog/commit/d962ecb11e929de1d9aa458b5d8e82270eff0039))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* update log file arg names in cli scripts ([6a5c593](https://gitea.deepak.science:2222/physics/deepdog/commit/6a5c5931d4fc849d0d6a0f2b971523a0f039d559))
|
||||
|
||||
## [1.3.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.1...1.3.0) (2024-05-20)
|
||||
|
||||
|
||||
|
@ -13,7 +13,7 @@ def parse_args() -> argparse.Namespace:
|
||||
"probs", description="Calculating probability from finished bayesrun"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log_file",
|
||||
"--log-file",
|
||||
type=str,
|
||||
help="A filename for logging to, if not provided will only log to stderr",
|
||||
default=None,
|
||||
|
5
deepdog/cli/subset_sim_probs/__init__.py
Normal file
5
deepdog/cli/subset_sim_probs/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from deepdog.cli.subset_sim_probs.main import wrapped_main
|
||||
|
||||
__all__ = [
|
||||
"wrapped_main",
|
||||
]
|
52
deepdog/cli/subset_sim_probs/args.py
Normal file
52
deepdog/cli/subset_sim_probs/args.py
Normal file
@ -0,0 +1,52 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
def dir_path(path):
|
||||
if os.path.isdir(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
"subset_sim_probs",
|
||||
description="Calculating probability from finished subset sim run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
type=str,
|
||||
help="A filename for logging to, if not provided will only log to stderr",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--results-directory",
|
||||
"-d",
|
||||
type=dir_path,
|
||||
help="The directory to search for bayesrun files, defaulting to cwd if not passed",
|
||||
default=".",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--indexify-json",
|
||||
help="A json file with the indexify config for parsing job indexes. Will skip if not present",
|
||||
default="",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outfile",
|
||||
"-o",
|
||||
type=str,
|
||||
help="output filename for coalesced data. If not provided, will not be written",
|
||||
default=None,
|
||||
)
|
||||
confirm_outfile_overwrite_group = parser.add_mutually_exclusive_group()
|
||||
confirm_outfile_overwrite_group.add_argument(
|
||||
"--never-overwrite-outfile",
|
||||
action="store_true",
|
||||
help="If a duplicate outfile is detected, skip confirmation and automatically exit early",
|
||||
)
|
||||
confirm_outfile_overwrite_group.add_argument(
|
||||
"--force-overwrite-outfile",
|
||||
action="store_true",
|
||||
help="Skips checking for duplicate outfiles and overwrites",
|
||||
)
|
||||
return parser.parse_args()
|
136
deepdog/cli/subset_sim_probs/dicts.py
Normal file
136
deepdog/cli/subset_sim_probs/dicts.py
Normal file
@ -0,0 +1,136 @@
|
||||
import typing
|
||||
from deepdog.results import GeneralOutput
|
||||
import logging
|
||||
import csv
|
||||
import tqdm
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_model_dict(
|
||||
general_outputs: typing.Sequence[GeneralOutput],
|
||||
) -> typing.Dict[
|
||||
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
||||
]:
|
||||
"""
|
||||
Maybe someday do something smarter with the coalescing and stuff but don't want to so i won't
|
||||
"""
|
||||
# assume that everything is well formatted and the keys are the same across entire list and initialise list of keys.
|
||||
# model dict will contain a model_key: {calculation_dict} where each calculation_dict represents a single calculation for that model,
|
||||
# the uncoalesced version, keyed by the specific file keys
|
||||
model_dict: typing.Dict[
|
||||
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
||||
] = {}
|
||||
|
||||
_logger.info("building model dict")
|
||||
for out in tqdm.tqdm(general_outputs, desc="reading outputs", leave=False):
|
||||
for model_result in out.results:
|
||||
model_key = tuple(v for v in model_result.parsed_model_keys.values())
|
||||
if model_key not in model_dict:
|
||||
model_dict[model_key] = {}
|
||||
calculation_dict = model_dict[model_key]
|
||||
calculation_key = tuple(v for v in out.data.values())
|
||||
if calculation_key not in calculation_dict:
|
||||
calculation_dict[calculation_key] = {
|
||||
"_model_key_dict": model_result.parsed_model_keys,
|
||||
"_calculation_key_dict": out.data,
|
||||
"num_finished_runs": int(
|
||||
model_result.result_dict["num_finished_runs"]
|
||||
),
|
||||
"num_runs": int(model_result.result_dict["num_runs"]),
|
||||
"estimated_likelihood": float(
|
||||
model_result.result_dict["estimated_likelihood"]
|
||||
),
|
||||
}
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Got {calculation_key} twice for model_key {model_key}"
|
||||
)
|
||||
|
||||
return model_dict
|
||||
|
||||
|
||||
def coalesced_dict(
|
||||
uncoalesced_model_dict: typing.Dict[
|
||||
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
||||
],
|
||||
):
|
||||
"""
|
||||
pass in uncoalesced dict
|
||||
the minimum_count field is what we use to make sure our probs are never zero
|
||||
"""
|
||||
coalesced_dict = {}
|
||||
|
||||
# we are already iterating so for no reason because performance really doesn't matter let's count the keys ourselves
|
||||
num_keys = 0
|
||||
|
||||
# first pass coalesce
|
||||
for model_key, model_dict in uncoalesced_model_dict.items():
|
||||
num_keys += 1
|
||||
for calculation in model_dict.values():
|
||||
if model_key not in coalesced_dict:
|
||||
coalesced_dict[model_key] = {
|
||||
"_model_key_dict": calculation["_model_key_dict"].copy(),
|
||||
"calculations_coalesced": 1,
|
||||
"num_finished_runs": calculation["num_finished_runs"],
|
||||
"num_runs": calculation["num_runs"],
|
||||
"estimated_likelihood": calculation["estimated_likelihood"],
|
||||
}
|
||||
else:
|
||||
_logger.error(f"We shouldn't be here! Double key for {model_key=}")
|
||||
raise ValueError()
|
||||
|
||||
# second pass do probability calculation
|
||||
|
||||
prior = 1 / num_keys
|
||||
_logger.info(f"Got {num_keys} model keys, so our prior will be {prior}")
|
||||
|
||||
total_weight = 0
|
||||
for coalesced_model_dict in coalesced_dict.values():
|
||||
model_weight = coalesced_model_dict["estimated_likelihood"] * prior
|
||||
total_weight += model_weight
|
||||
|
||||
total_prob = 0
|
||||
for coalesced_model_dict in coalesced_dict.values():
|
||||
likelihood = coalesced_model_dict["estimated_likelihood"]
|
||||
prob = likelihood * prior / total_weight
|
||||
coalesced_model_dict["prob"] = prob
|
||||
total_prob += prob
|
||||
|
||||
_logger.debug(
|
||||
f"Got a total probability of {total_prob}, which should be close to 1 up to float/rounding error"
|
||||
)
|
||||
return coalesced_dict
|
||||
|
||||
|
||||
def write_coalesced_dict(
|
||||
coalesced_output_filename: typing.Optional[str],
|
||||
coalesced_model_dict: typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]],
|
||||
):
|
||||
if coalesced_output_filename is None or coalesced_output_filename == "":
|
||||
_logger.warning("Not provided a uncoalesced filename, not going to try")
|
||||
return
|
||||
|
||||
first_value = next(iter(coalesced_model_dict.values()))
|
||||
model_field_names = set(first_value["_model_key_dict"].keys())
|
||||
_logger.info(f"Detected model field names {model_field_names}")
|
||||
|
||||
collected_fieldnames = list(model_field_names)
|
||||
collected_fieldnames.extend(
|
||||
["calculations_coalesced", "num_finished_runs", "num_runs", "prob"]
|
||||
)
|
||||
with open(coalesced_output_filename, "w", newline="") as coalesced_output_file:
|
||||
writer = csv.DictWriter(coalesced_output_file, fieldnames=collected_fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for model_dict in coalesced_model_dict.values():
|
||||
row = model_dict["_model_key_dict"].copy()
|
||||
row.update(
|
||||
{
|
||||
"calculations_coalesced": model_dict["calculations_coalesced"],
|
||||
"num_finished_runs": model_dict["num_finished_runs"],
|
||||
"num_runs": model_dict["num_runs"],
|
||||
"prob": model_dict["prob"],
|
||||
}
|
||||
)
|
||||
writer.writerow(row)
|
113
deepdog/cli/subset_sim_probs/main.py
Normal file
113
deepdog/cli/subset_sim_probs/main.py
Normal file
@ -0,0 +1,113 @@
|
||||
import logging
|
||||
import argparse
|
||||
import json
|
||||
|
||||
import deepdog.cli.subset_sim_probs.args
|
||||
import deepdog.cli.subset_sim_probs.dicts
|
||||
import deepdog.cli.util
|
||||
import deepdog.results
|
||||
import deepdog.indexify
|
||||
import pathlib
|
||||
import tqdm
|
||||
import os
|
||||
import tqdm.contrib.logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def set_up_logging(log_file: str):
|
||||
|
||||
log_pattern = "%(asctime)s | %(levelname)-7s | %(name)s:%(lineno)d | %(message)s"
|
||||
if log_file is None:
|
||||
handlers = [
|
||||
logging.StreamHandler(),
|
||||
]
|
||||
else:
|
||||
handlers = [logging.StreamHandler(), logging.FileHandler(log_file)]
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format=log_pattern,
|
||||
# it's okay to ignore this mypy error because who cares about logger handler types
|
||||
handlers=handlers, # type: ignore
|
||||
)
|
||||
logging.captureWarnings(True)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
"""
|
||||
Main function with passed in arguments and no additional logging setup in case we want to extract out later
|
||||
"""
|
||||
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
_logger.info(f"args: {args}")
|
||||
|
||||
if "outfile" in args and args.outfile:
|
||||
if os.path.exists(args.outfile):
|
||||
if args.never_overwrite_outfile:
|
||||
_logger.warning(
|
||||
f"Filename {args.outfile} already exists, and never want overwrite, so aborting."
|
||||
)
|
||||
return
|
||||
elif args.force_overwrite_outfile:
|
||||
_logger.warning(f"Forcing overwrite of {args.outfile}")
|
||||
else:
|
||||
# need to confirm
|
||||
confirm_overwrite = deepdog.cli.util.confirm_prompt(
|
||||
f"Filename {args.outfile} exists, overwrite?"
|
||||
)
|
||||
if not confirm_overwrite:
|
||||
_logger.warning(
|
||||
f"Filename {args.outfile} already exists and do not want overwrite, aborting."
|
||||
)
|
||||
return
|
||||
else:
|
||||
_logger.warning(f"Overwriting file {args.outfile}")
|
||||
|
||||
indexifier = None
|
||||
if args.indexify_json:
|
||||
with open(args.indexify_json, "r") as indexify_json_file:
|
||||
indexify_spec = json.load(indexify_json_file)
|
||||
indexify_data = indexify_spec["indexes"]
|
||||
if "seed_spec" in indexify_spec:
|
||||
seed_spec = indexify_spec["seed_spec"]
|
||||
indexify_data[seed_spec["field_name"]] = list(
|
||||
range(seed_spec["num_seeds"])
|
||||
)
|
||||
# _logger.debug(f"Indexifier data looks like {indexify_data}")
|
||||
indexifier = deepdog.indexify.Indexifier(indexify_data)
|
||||
|
||||
results_dir = pathlib.Path(args.results_directory)
|
||||
out_files = [
|
||||
f for f in results_dir.iterdir() if f.name.endswith("subsetsim.csv")
|
||||
]
|
||||
_logger.info(
|
||||
f"Reading {len(out_files)} subsetsim.csv files in directory {args.results_directory}"
|
||||
)
|
||||
# _logger.info(out_files)
|
||||
parsed_output_files = [
|
||||
deepdog.results.read_subset_sim_file(f, indexifier)
|
||||
for f in tqdm.tqdm(out_files, desc="reading files", leave=False)
|
||||
]
|
||||
|
||||
# Refactor here to allow for arbitrary likelihood file sources
|
||||
_logger.info("building uncoalesced dict")
|
||||
uncoalesced_dict = deepdog.cli.subset_sim_probs.dicts.build_model_dict(
|
||||
parsed_output_files
|
||||
)
|
||||
|
||||
_logger.info("building coalesced dict")
|
||||
coalesced = deepdog.cli.subset_sim_probs.dicts.coalesced_dict(uncoalesced_dict)
|
||||
|
||||
if "outfile" in args and args.outfile:
|
||||
deepdog.cli.subset_sim_probs.dicts.write_coalesced_dict(
|
||||
args.outfile, coalesced
|
||||
)
|
||||
else:
|
||||
_logger.info("Skipping writing coalesced")
|
||||
|
||||
|
||||
def wrapped_main():
|
||||
args = deepdog.cli.subset_sim_probs.args.parse_args()
|
||||
set_up_logging(args.log_file)
|
||||
main(args)
|
3
deepdog/cli/util/__init__.py
Normal file
3
deepdog/cli/util/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from deepdog.cli.util.confirm import confirm_prompt
|
||||
|
||||
__all__ = ["confirm_prompt"]
|
23
deepdog/cli/util/confirm.py
Normal file
23
deepdog/cli/util/confirm.py
Normal file
@ -0,0 +1,23 @@
|
||||
_RESPONSE_MAP = {
|
||||
"yes": True,
|
||||
"ye": True,
|
||||
"y": True,
|
||||
"no": False,
|
||||
"n": False,
|
||||
"nope": False,
|
||||
"true": True,
|
||||
"false": False,
|
||||
}
|
||||
|
||||
|
||||
def confirm_prompt(question: str) -> bool:
|
||||
"""Prompt with the question and returns yes or no based on response."""
|
||||
prompt = question + " [y/n]: "
|
||||
|
||||
while True:
|
||||
choice = input(prompt).lower()
|
||||
|
||||
if choice in _RESPONSE_MAP:
|
||||
return _RESPONSE_MAP[choice]
|
||||
else:
|
||||
print('Respond with "yes" or "no"')
|
@ -1,3 +1,5 @@
|
||||
import re
|
||||
import pathlib
|
||||
import csv
|
||||
import pdme.model
|
||||
import pdme.measurement
|
||||
@ -36,9 +38,35 @@ class DirectMonteCarloConfig:
|
||||
tag: str = ""
|
||||
cap_core_count: int = 0 # 0 means cap at num cores - 1
|
||||
chunk_size: int = 50
|
||||
write_bayesrun_file = True
|
||||
bayesrun_file_timestamp = True
|
||||
# chunk size of some kind
|
||||
write_bayesrun_file: bool = True
|
||||
bayesrun_file_timestamp: bool = True
|
||||
skip_if_exists: bool = False
|
||||
|
||||
def get_filename(self) -> str:
|
||||
"""
|
||||
Generate a filename for the output of this run.
|
||||
"""
|
||||
# set starting execution timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
if self.bayesrun_file_timestamp:
|
||||
timestamp_str = f"{timestamp}-"
|
||||
else:
|
||||
timestamp_str = ""
|
||||
filename = f"{timestamp_str}{self.tag}.realdata.fast_filter.bayesrun.csv"
|
||||
_logger.debug(f"Got filename {filename}")
|
||||
return filename
|
||||
|
||||
def get_filename_regex(self) -> str:
|
||||
"""
|
||||
Generate a regex for the output of this run.
|
||||
"""
|
||||
|
||||
# having both timestamp and the hyphen separately optional is a bit of a hack
|
||||
# too loose, but will never matter
|
||||
pattern = rf"(?P<timestamp>\d{{8}}-\d{{6}})?-?{self.tag}\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
return pattern
|
||||
|
||||
|
||||
# Aliasing dict as a generic data container
|
||||
@ -145,15 +173,21 @@ class DirectMonteCarloRun:
|
||||
single run wrapped up for multiprocessing call.
|
||||
|
||||
takes in a tuple of arguments corresponding to
|
||||
(model_name_pair, seed)
|
||||
(model_name_pair, seed, return_configs)
|
||||
|
||||
return_configs is a boolean, if true then will return tuple of (count, [matching configs])
|
||||
if false, return (count, [])
|
||||
"""
|
||||
# here's where we do our work
|
||||
|
||||
model_name_pair, seed = args
|
||||
model_name_pair, seed, return_configs = args
|
||||
cycle_success_configs = self._single_run(model_name_pair, seed)
|
||||
cycle_success_count = len(cycle_success_configs)
|
||||
|
||||
return cycle_success_count
|
||||
if return_configs:
|
||||
return (cycle_success_count, cycle_success_configs)
|
||||
else:
|
||||
return (cycle_success_count, [])
|
||||
|
||||
def execute_no_multiprocessing(self) -> Sequence[DirectMonteCarloResult]:
|
||||
|
||||
@ -198,9 +232,11 @@ class DirectMonteCarloRun:
|
||||
)
|
||||
dipole_count = numpy.array(cycle_success_configs).shape[1]
|
||||
for n in range(dipole_count):
|
||||
number_dipoles_to_write = self.config.target_success * 5
|
||||
_logger.info(f"Limiting to {number_dipoles_to_write=}")
|
||||
numpy.savetxt(
|
||||
f"{self.config.tag}_{step_count}_{cycle_i}_dipole_{n}.csv",
|
||||
sorted_by_freq[:, n],
|
||||
sorted_by_freq[:number_dipoles_to_write, n],
|
||||
delimiter=",",
|
||||
)
|
||||
total_success += cycle_success_count
|
||||
@ -222,8 +258,27 @@ class DirectMonteCarloRun:
|
||||
|
||||
def execute(self) -> Sequence[DirectMonteCarloResult]:
|
||||
|
||||
# set starting execution timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
filename = self.config.get_filename()
|
||||
if self.config.skip_if_exists:
|
||||
_logger.info(f"Checking if {filename} exists")
|
||||
cwd = pathlib.Path.cwd()
|
||||
if (cwd / filename).exists():
|
||||
_logger.info(f"File {filename} exists, skipping")
|
||||
return []
|
||||
if self.config.bayesrun_file_timestamp:
|
||||
_logger.info(
|
||||
"Also need to check file endings because of possible past or current timestamps, check only occurs if writing timestamp is set"
|
||||
)
|
||||
pattern = self.config.get_filename_regex()
|
||||
for file in cwd.iterdir():
|
||||
match = re.match(pattern, file.name)
|
||||
if match is not None:
|
||||
_logger.info(f"Matched {file.name} to {pattern}")
|
||||
_logger.info(f"File {filename} exists, skipping")
|
||||
return []
|
||||
_logger.info(
|
||||
f"Finished checking against pattern {pattern}, hopefully didn't take too long!"
|
||||
)
|
||||
|
||||
count_per_step = (
|
||||
self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
|
||||
@ -259,15 +314,71 @@ class DirectMonteCarloRun:
|
||||
|
||||
seeds = seed_sequence.spawn(self.config.monte_carlo_cycles)
|
||||
|
||||
pool_results = sum(
|
||||
raw_pool_results = list(
|
||||
pool.imap_unordered(
|
||||
self._wrapped_single_run,
|
||||
[(model_name_pair, seed) for seed in seeds],
|
||||
[
|
||||
(
|
||||
model_name_pair,
|
||||
seed,
|
||||
self.config.write_successes_to_file,
|
||||
)
|
||||
for seed in seeds
|
||||
],
|
||||
self.config.chunk_size,
|
||||
)
|
||||
)
|
||||
|
||||
pool_results = sum(result[0] for result in raw_pool_results)
|
||||
|
||||
_logger.debug(f"Pool results: {pool_results}")
|
||||
|
||||
if self.config.write_successes_to_file:
|
||||
|
||||
_logger.info("Writing dipole results")
|
||||
|
||||
cycle_success_configs = numpy.concatenate(
|
||||
[result[1] for result in raw_pool_results]
|
||||
)
|
||||
|
||||
dipole_count = numpy.array(cycle_success_configs).shape[1]
|
||||
|
||||
max_number_dipoles_to_write = self.config.target_success * 5
|
||||
_logger.debug(
|
||||
f"Limiting to {max_number_dipoles_to_write=}, have {len(cycle_success_configs)}"
|
||||
)
|
||||
|
||||
if len(cycle_success_configs):
|
||||
sorted_by_freq = numpy.array(
|
||||
[
|
||||
pdme.subspace_simulation.sort_array_of_dipoles_by_frequency(
|
||||
dipole_config
|
||||
)
|
||||
for dipole_config in cycle_success_configs[
|
||||
:max_number_dipoles_to_write
|
||||
]
|
||||
]
|
||||
)
|
||||
|
||||
for n in range(dipole_count):
|
||||
|
||||
dipole_filename = (
|
||||
f"{self.config.tag}_{step_count}_dipole_{n}.csv"
|
||||
)
|
||||
_logger.debug(
|
||||
f"Writing {min(len(cycle_success_configs), max_number_dipoles_to_write)} to {dipole_filename}"
|
||||
)
|
||||
|
||||
numpy.savetxt(
|
||||
dipole_filename,
|
||||
sorted_by_freq[:, n],
|
||||
delimiter=",",
|
||||
)
|
||||
else:
|
||||
_logger.debug(
|
||||
"Instructed to write results, but none obtained"
|
||||
)
|
||||
|
||||
total_success += pool_results
|
||||
total_count += count_per_step
|
||||
_logger.debug(
|
||||
@ -285,14 +396,6 @@ class DirectMonteCarloRun:
|
||||
|
||||
if self.config.write_bayesrun_file:
|
||||
|
||||
if self.config.bayesrun_file_timestamp:
|
||||
timestamp_str = f"{timestamp}-"
|
||||
else:
|
||||
timestamp_str = ""
|
||||
filename = (
|
||||
f"{timestamp_str}{self.config.tag}.realdata.fast_filter.bayesrun.csv"
|
||||
)
|
||||
|
||||
_logger.info(f"Going to write to file [{filename}]")
|
||||
# row: Dict[str, Union[int, float, str]] = {}
|
||||
row = {}
|
||||
|
@ -36,6 +36,10 @@ class Indexifier:
|
||||
def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
|
||||
return self.product_dict[n]
|
||||
|
||||
def __len__(self) -> int:
|
||||
weights = [len(v) for v in self.dict.values()]
|
||||
return math.prod(weights)
|
||||
|
||||
def _indexify_indices(self, n: int) -> typing.Sequence[int]:
|
||||
"""
|
||||
legacy indexify from old scripts, copypast.
|
||||
|
@ -5,6 +5,13 @@ import logging
|
||||
import deepdog.indexify
|
||||
import pathlib
|
||||
import csv
|
||||
from deepdog.results.read_csv import (
|
||||
parse_bayesrun_row,
|
||||
BayesrunModelResult,
|
||||
parse_general_row,
|
||||
GeneralModelResult,
|
||||
)
|
||||
from deepdog.results.filename import parse_file_slug
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
@ -12,67 +19,24 @@ FILENAME_REGEX = re.compile(
|
||||
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
)
|
||||
|
||||
MODEL_REGEXES = [
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
]
|
||||
]
|
||||
# probably a better way but who cares
|
||||
NO_TIMESTAMP_FILENAME_REGEX = re.compile(
|
||||
r"(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
)
|
||||
|
||||
FILE_SLUG_REGEXES = [
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"(?P<tag>\w+)-(?P<job_index>\d+)",
|
||||
r"mock_tarucha-(?P<job_index>\d+)",
|
||||
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
|
||||
]
|
||||
]
|
||||
|
||||
SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
|
||||
SUBSET_SIM_FILENAME_REGEX = re.compile(
|
||||
r"(?P<filename_slug>.*)-(?:no_adaptive_steps_)?(?P<num_ss_runs>\d+)-nc_(?P<n_c>\d+)-ns_(?P<n_s>\d+)-mmax_(?P<mmax>\d+)\.multi\.subsetsim\.csv"
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunOutputFilename:
|
||||
timestamp: str
|
||||
timestamp: typing.Optional[str]
|
||||
filename_slug: str
|
||||
path: pathlib.Path
|
||||
|
||||
|
||||
class BayesrunColumnParsed:
|
||||
"""
|
||||
class for parsing a bayesrun while pulling certain special fields out
|
||||
"""
|
||||
|
||||
def __init__(self, groupdict: typing.Dict[str, str]):
|
||||
self.column_field = groupdict["field_name"]
|
||||
self.model_field_dict = {
|
||||
k: v for k, v in groupdict.items() if k != "field_name"
|
||||
}
|
||||
self._groupdict_str = repr(groupdict)
|
||||
|
||||
def __str__(self):
|
||||
return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
|
||||
|
||||
def __repr__(self):
|
||||
return f"BayesrunColumnParsed({self._groupdict_str})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, BayesrunColumnParsed):
|
||||
return (self.column_field == other.column_field) and (
|
||||
self.model_field_dict == other.model_field_dict
|
||||
)
|
||||
return NotImplemented
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunModelResult:
|
||||
parsed_model_keys: typing.Dict[str, str]
|
||||
success: int
|
||||
count: int
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunOutput:
|
||||
filename: BayesrunOutputFilename
|
||||
@ -80,88 +44,52 @@ class BayesrunOutput:
|
||||
results: typing.Sequence[BayesrunModelResult]
|
||||
|
||||
|
||||
def _batch_iterable_into_chunks(iterable, n=1):
|
||||
"""
|
||||
utility for batching bayesrun files where columns appear in threes
|
||||
"""
|
||||
for ndx in range(0, len(iterable), n):
|
||||
yield iterable[ndx : min(ndx + n, len(iterable))]
|
||||
@dataclasses.dataclass
|
||||
class GeneralOutput:
|
||||
filename: BayesrunOutputFilename
|
||||
data: typing.Dict["str", typing.Any]
|
||||
results: typing.Sequence[GeneralModelResult]
|
||||
|
||||
|
||||
def _parse_bayesrun_column(
|
||||
column: str,
|
||||
) -> typing.Optional[BayesrunColumnParsed]:
|
||||
"""
|
||||
Tries one by one all of a predefined list of regexes that I might have used in the past.
|
||||
Returns the groupdict for the first match, or None if no match found.
|
||||
"""
|
||||
for pattern in MODEL_REGEXES:
|
||||
match = pattern.match(column)
|
||||
if match:
|
||||
return BayesrunColumnParsed(match.groupdict())
|
||||
def _parse_string_output_filename(
|
||||
filename: str,
|
||||
) -> typing.Tuple[typing.Optional[str], str]:
|
||||
if match := FILENAME_REGEX.match(filename):
|
||||
groups = match.groupdict()
|
||||
return (groups["timestamp"], groups["filename_slug"])
|
||||
elif match := NO_TIMESTAMP_FILENAME_REGEX.match(filename):
|
||||
groups = match.groupdict()
|
||||
return (None, groups["filename_slug"])
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_bayesrun_row(
|
||||
row: typing.Dict[str, str],
|
||||
) -> typing.Sequence[BayesrunModelResult]:
|
||||
|
||||
results = []
|
||||
batched_keys = _batch_iterable_into_chunks(list(row.keys()), 3)
|
||||
for model_keys in batched_keys:
|
||||
parsed = [_parse_bayesrun_column(column) for column in model_keys]
|
||||
values = [row[column] for column in model_keys]
|
||||
if parsed[0] is None:
|
||||
raise ValueError(f"no viable success row found for keys {model_keys}")
|
||||
if parsed[1] is None:
|
||||
raise ValueError(f"no viable count row found for keys {model_keys}")
|
||||
if parsed[0].column_field != "success":
|
||||
raise ValueError(f"The column {model_keys[0]} is not a success field")
|
||||
if parsed[1].column_field != "count":
|
||||
raise ValueError(f"The column {model_keys[1]} is not a count field")
|
||||
parsed_keys = parsed[0].model_field_dict
|
||||
success = int(values[0])
|
||||
count = int(values[1])
|
||||
results.append(
|
||||
BayesrunModelResult(
|
||||
parsed_model_keys=parsed_keys,
|
||||
success=success,
|
||||
count=count,
|
||||
)
|
||||
)
|
||||
return results
|
||||
raise ValueError(f"Could not parse {filename} as a bayesrun output filename")
|
||||
|
||||
|
||||
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
filename = file.name
|
||||
match = FILENAME_REGEX.match(filename)
|
||||
timestamp, slug = _parse_string_output_filename(filename)
|
||||
return BayesrunOutputFilename(timestamp=timestamp, filename_slug=slug, path=file)
|
||||
|
||||
|
||||
def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
filename = file.name
|
||||
match = SUBSET_SIM_FILENAME_REGEX.match(filename)
|
||||
if not match:
|
||||
raise ValueError(f"{filename} was not a valid bayesrun output")
|
||||
raise ValueError(f"{filename} was not a valid subset sim output")
|
||||
groups = match.groupdict()
|
||||
return BayesrunOutputFilename(
|
||||
timestamp=groups["timestamp"], filename_slug=groups["filename_slug"], path=file
|
||||
filename_slug=groups["filename_slug"], path=file, timestamp=None
|
||||
)
|
||||
|
||||
|
||||
def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
|
||||
for pattern in FILE_SLUG_REGEXES:
|
||||
match = pattern.match(slug)
|
||||
if match:
|
||||
return match.groupdict()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def read_output_file(
|
||||
def read_subset_sim_file(
|
||||
file: pathlib.Path, indexifier: typing.Optional[deepdog.indexify.Indexifier]
|
||||
) -> BayesrunOutput:
|
||||
) -> GeneralOutput:
|
||||
|
||||
parsed_filename = tag = _parse_output_filename(file)
|
||||
out = BayesrunOutput(filename=parsed_filename, data={}, results=[])
|
||||
parsed_filename = tag = _parse_ss_output_filename(file)
|
||||
out = GeneralOutput(filename=parsed_filename, data={}, results=[])
|
||||
|
||||
out.data.update(dataclasses.asdict(tag))
|
||||
parsed_tag = _parse_file_slug(parsed_filename.filename_slug)
|
||||
parsed_tag = parse_file_slug(parsed_filename.filename_slug)
|
||||
if parsed_tag is None:
|
||||
_logger.warning(
|
||||
f"Could not parse {tag} against any matching regexes. Going to skip tag parsing"
|
||||
@ -186,8 +114,53 @@ def read_output_file(
|
||||
row = rows[0]
|
||||
else:
|
||||
raise ValueError(f"Confused about having multiple rows in {file.name}")
|
||||
results = _parse_bayesrun_row(row)
|
||||
results = parse_general_row(
|
||||
row, ("num_finished_runs", "num_runs", None, "estimated_likelihood")
|
||||
)
|
||||
|
||||
out.results = results
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def read_output_file(
|
||||
file: pathlib.Path, indexifier: typing.Optional[deepdog.indexify.Indexifier]
|
||||
) -> BayesrunOutput:
|
||||
|
||||
parsed_filename = tag = _parse_output_filename(file)
|
||||
out = BayesrunOutput(filename=parsed_filename, data={}, results=[])
|
||||
|
||||
out.data.update(dataclasses.asdict(tag))
|
||||
parsed_tag = parse_file_slug(parsed_filename.filename_slug)
|
||||
if parsed_tag is None:
|
||||
_logger.warning(
|
||||
f"Could not parse {tag} against any matching regexes. Going to skip tag parsing"
|
||||
)
|
||||
else:
|
||||
out.data.update(parsed_tag)
|
||||
if indexifier is not None:
|
||||
try:
|
||||
job_index = parsed_tag["job_index"]
|
||||
indexified = indexifier.indexify(int(job_index))
|
||||
out.data.update(indexified)
|
||||
except KeyError:
|
||||
# This isn't really that important of an error, apart from the warning
|
||||
_logger.warning(
|
||||
f"Parsed tag to {parsed_tag}, and attempted to indexify but no job_index key was found. skipping and moving on"
|
||||
)
|
||||
|
||||
with file.open() as input_file:
|
||||
reader = csv.DictReader(input_file)
|
||||
rows = [r for r in reader]
|
||||
if len(rows) == 1:
|
||||
row = rows[0]
|
||||
else:
|
||||
raise ValueError(f"Confused about having multiple rows in {file.name}")
|
||||
results = parse_bayesrun_row(row)
|
||||
|
||||
out.results = results
|
||||
|
||||
return out
|
||||
|
||||
|
||||
__all__ = ["read_output_file", "BayesrunOutput"]
|
||||
|
22
deepdog/results/filename.py
Normal file
22
deepdog/results/filename.py
Normal file
@ -0,0 +1,22 @@
|
||||
import re
|
||||
import typing
|
||||
|
||||
|
||||
FILE_SLUG_REGEXES = [
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"(?P<tag>\w+)-(?P<job_index>\d+)",
|
||||
r"mock_tarucha-(?P<job_index>\d+)",
|
||||
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
|
||||
r"(?P<tag>\w+)-(?P<included_dots>[\w,]+)-(?P<target_cost>\d*\.?\d+)-(?P<job_index>\d+)",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
|
||||
for pattern in FILE_SLUG_REGEXES:
|
||||
match = pattern.match(slug)
|
||||
if match:
|
||||
return match.groupdict()
|
||||
else:
|
||||
return None
|
141
deepdog/results/read_csv.py
Normal file
141
deepdog/results/read_csv.py
Normal file
@ -0,0 +1,141 @@
|
||||
import typing
|
||||
import re
|
||||
import dataclasses
|
||||
|
||||
MODEL_REGEXES = [
|
||||
re.compile(pattern)
|
||||
for pattern in [
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BayesrunModelResult:
|
||||
parsed_model_keys: typing.Dict[str, str]
|
||||
success: int
|
||||
count: int
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class GeneralModelResult:
|
||||
parsed_model_keys: typing.Dict[str, str]
|
||||
result_dict: typing.Dict[str, str]
|
||||
|
||||
|
||||
class BayesrunColumnParsed:
|
||||
"""
|
||||
class for parsing a bayesrun while pulling certain special fields out
|
||||
"""
|
||||
|
||||
def __init__(self, groupdict: typing.Dict[str, str]):
|
||||
self.column_field = groupdict["field_name"]
|
||||
self.model_field_dict = {
|
||||
k: v for k, v in groupdict.items() if k != "field_name"
|
||||
}
|
||||
self._groupdict_str = repr(groupdict)
|
||||
|
||||
def __str__(self):
|
||||
return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
|
||||
|
||||
def __repr__(self):
|
||||
return f"BayesrunColumnParsed({self._groupdict_str})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, BayesrunColumnParsed):
|
||||
return (self.column_field == other.column_field) and (
|
||||
self.model_field_dict == other.model_field_dict
|
||||
)
|
||||
return NotImplemented
|
||||
|
||||
|
||||
def _parse_bayesrun_column(
|
||||
column: str,
|
||||
) -> typing.Optional[BayesrunColumnParsed]:
|
||||
"""
|
||||
Tries one by one all of a predefined list of regexes that I might have used in the past.
|
||||
Returns the groupdict for the first match, or None if no match found.
|
||||
"""
|
||||
for pattern in MODEL_REGEXES:
|
||||
match = pattern.match(column)
|
||||
if match:
|
||||
return BayesrunColumnParsed(match.groupdict())
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _batch_iterable_into_chunks(iterable, n=1):
|
||||
"""
|
||||
utility for batching bayesrun files where columns appear in threes
|
||||
"""
|
||||
for ndx in range(0, len(iterable), n):
|
||||
yield iterable[ndx : min(ndx + n, len(iterable))]
|
||||
|
||||
|
||||
def parse_general_row(
|
||||
row: typing.Dict[str, str],
|
||||
expected_fields: typing.Sequence[typing.Optional[str]],
|
||||
) -> typing.Sequence[GeneralModelResult]:
|
||||
results = []
|
||||
batched_keys = _batch_iterable_into_chunks(list(row.keys()), len(expected_fields))
|
||||
for model_keys in batched_keys:
|
||||
parsed = [_parse_bayesrun_column(column) for column in model_keys]
|
||||
values = [row[column] for column in model_keys]
|
||||
|
||||
result_dict = {}
|
||||
parsed_keys = None
|
||||
for expected_field, parsed_field, value in zip(expected_fields, parsed, values):
|
||||
if expected_field is None:
|
||||
continue
|
||||
if parsed_field is None:
|
||||
raise ValueError(
|
||||
f"No viable row found for {expected_field=} in {model_keys=}"
|
||||
)
|
||||
if parsed_field.column_field != expected_field:
|
||||
raise ValueError(
|
||||
f"The column {parsed_field.column_field} does not match expected {expected_field}"
|
||||
)
|
||||
result_dict[expected_field] = value
|
||||
if parsed_keys is None:
|
||||
parsed_keys = parsed_field.model_field_dict
|
||||
|
||||
if parsed_keys is None:
|
||||
raise ValueError(f"Somehow parsed keys is none here, for {row=}")
|
||||
results.append(
|
||||
GeneralModelResult(parsed_model_keys=parsed_keys, result_dict=result_dict)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def parse_bayesrun_row(
|
||||
row: typing.Dict[str, str],
|
||||
) -> typing.Sequence[BayesrunModelResult]:
|
||||
|
||||
results = []
|
||||
batched_keys = _batch_iterable_into_chunks(list(row.keys()), 3)
|
||||
for model_keys in batched_keys:
|
||||
parsed = [_parse_bayesrun_column(column) for column in model_keys]
|
||||
values = [row[column] for column in model_keys]
|
||||
if parsed[0] is None:
|
||||
raise ValueError(f"no viable success row found for keys {model_keys}")
|
||||
if parsed[1] is None:
|
||||
raise ValueError(f"no viable count row found for keys {model_keys}")
|
||||
if parsed[0].column_field != "success":
|
||||
raise ValueError(f"The column {model_keys[0]} is not a success field")
|
||||
if parsed[1].column_field != "count":
|
||||
raise ValueError(f"The column {model_keys[1]} is not a count field")
|
||||
parsed_keys = parsed[0].model_field_dict
|
||||
success = int(values[0])
|
||||
count = int(values[1])
|
||||
results.append(
|
||||
BayesrunModelResult(
|
||||
parsed_model_keys=parsed_keys,
|
||||
success=success,
|
||||
count=count,
|
||||
)
|
||||
)
|
||||
return results
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "deepdog"
|
||||
version = "1.3.0"
|
||||
version = "1.7.0"
|
||||
description = ""
|
||||
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
|
||||
|
||||
@ -22,6 +22,7 @@ syrupy = "^4.0.8"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
probs = "deepdog.cli.probs:wrapped_main"
|
||||
subset_sim_probs = "deepdog.cli.subset_sim_probs:wrapped_main"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
|
26
tests/direct_monte_carlo/test_config_filename.py
Normal file
26
tests/direct_monte_carlo/test_config_filename.py
Normal file
@ -0,0 +1,26 @@
|
||||
import re
|
||||
import deepdog.direct_monte_carlo
|
||||
|
||||
|
||||
def test_config_check_self():
|
||||
config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
|
||||
tag="test_tag",
|
||||
bayesrun_file_timestamp=False,
|
||||
)
|
||||
expected_filename = "test_tag.realdata.fast_filter.bayesrun.csv"
|
||||
actual_filename = config.get_filename()
|
||||
assert actual_filename == expected_filename
|
||||
regex = config.get_filename_regex()
|
||||
assert re.match(regex, actual_filename) is not None
|
||||
|
||||
|
||||
def test_config_check_self_with_timestamp():
|
||||
config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
|
||||
tag="test_tag",
|
||||
bayesrun_file_timestamp=True,
|
||||
)
|
||||
expected_filename_ending = "test_tag.realdata.fast_filter.bayesrun.csv"
|
||||
actual_filename = config.get_filename()
|
||||
assert actual_filename.endswith(expected_filename_ending)
|
||||
regex = config.get_filename_regex()
|
||||
assert re.match(regex, actual_filename) is not None
|
@ -10,3 +10,12 @@ def test_indexifier():
|
||||
_logger.debug(f"setting up indexifier {indexifier}")
|
||||
assert indexifier.indexify(0) == {"key_1": 1, "key_2": "a"}
|
||||
assert indexifier.indexify(5) == {"key_1": 2, "key_2": "c"}
|
||||
assert len(indexifier) == 9
|
||||
|
||||
|
||||
def test_indexifier_length_short():
|
||||
weight_dict = {"key_1": [1, 2, 3], "key_2": ["b", "c"]}
|
||||
indexifier = deepdog.indexify.Indexifier(weight_dict)
|
||||
_logger.debug(f"setting up indexifier {indexifier}")
|
||||
|
||||
assert len(indexifier) == 6
|
||||
|
@ -1,4 +1,4 @@
|
||||
import deepdog.results
|
||||
import deepdog.results.read_csv
|
||||
|
||||
|
||||
def test_parse_groupdict():
|
||||
@ -6,9 +6,9 @@ def test_parse_groupdict():
|
||||
"geom_-20_20_-10_10_0_5-orientation_free-dipole_count_100_success"
|
||||
)
|
||||
|
||||
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
|
||||
parsed = deepdog.results.read_csv._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.BayesrunColumnParsed(
|
||||
expected = deepdog.results.read_csv.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
"xmax": "20",
|
||||
@ -29,9 +29,9 @@ def test_parse_groupdict_with_magnitude():
|
||||
"geom_-20_20_-10_10_0_5-magnitude_3.5-orientation_free-dipole_count_100_success"
|
||||
)
|
||||
|
||||
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
|
||||
parsed = deepdog.results.read_csv._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.BayesrunColumnParsed(
|
||||
expected = deepdog.results.read_csv.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
"xmax": "20",
|
||||
@ -48,6 +48,28 @@ def test_parse_groupdict_with_magnitude():
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
def test_parse_groupdict_with_negative_magnitude():
|
||||
example_column_name = "geom_-20_20_-10_10_0_5-magnitude_-3.5-orientation_free-dipole_count_100_success"
|
||||
|
||||
parsed = deepdog.results.read_csv._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.read_csv.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
"xmax": "20",
|
||||
"ymin": "-10",
|
||||
"ymax": "10",
|
||||
"zmin": "0",
|
||||
"zmax": "5",
|
||||
"orientation": "free",
|
||||
"avg_filled": "100",
|
||||
"log_magnitude": "-3.5",
|
||||
"field_name": "success",
|
||||
}
|
||||
)
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
# def test_parse_no_match_column_name():
|
||||
# parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
|
||||
# assert parsed is None
|
||||
|
19
tests/results/test_parse_filename.py
Normal file
19
tests/results/test_parse_filename.py
Normal file
@ -0,0 +1,19 @@
|
||||
import deepdog.results
|
||||
import pytest
|
||||
|
||||
|
||||
def test_parse_bayesrun_filename():
|
||||
valid1 = "20250226-204120-dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||
|
||||
timestamp, slug = deepdog.results._parse_string_output_filename(valid1)
|
||||
assert timestamp == "20250226-204120"
|
||||
assert slug == "dot1-dot1-2-0"
|
||||
|
||||
valid2 = "dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||
|
||||
timestamp, slug = deepdog.results._parse_string_output_filename(valid2)
|
||||
assert timestamp is None
|
||||
assert slug == "dot1-dot1-2-0"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
deepdog.results._parse_string_output_filename("not_a_valid_filename")
|
Loading…
x
Reference in New Issue
Block a user