Compare commits
6 Commits
aed66048b8
...
f33b5c7292
Author | SHA1 | Date | |
---|---|---|---|
f33b5c7292 | |||
71dc906a96 | |||
24c6e311c1 | |||
4dd3004a7b | |||
46f6b6cdf1 | |||
c8435b4b2a |
15
CHANGELOG.md
15
CHANGELOG.md
@ -2,6 +2,21 @@
|
||||
|
||||
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
||||
|
||||
## [1.7.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.6.0...1.7.0) (2025-02-27)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* adds configurable skip if file exists ([24c6e31](https://gitea.deepak.science:2222/physics/deepdog/commit/24c6e311c1d3067eb98cc60e6ca38d76373bf08e))
|
||||
|
||||
## [1.6.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.5.0...1.6.0) (2025-02-27)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* Adds ability to parse bayesruns without timestamps ([46f6b6c](https://gitea.deepak.science:2222/physics/deepdog/commit/46f6b6cdf15c67aedf0c871d201b8db320bccbdf))
|
||||
* allows negative log magnitude strings in models ([c8435b4](https://gitea.deepak.science:2222/physics/deepdog/commit/c8435b4b2a6e4b89030f53b5734eb743e2003fb7))
|
||||
|
||||
## [1.5.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.4.0...1.5.0) (2024-12-30)
|
||||
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
import re
|
||||
import pathlib
|
||||
import csv
|
||||
import pdme.model
|
||||
import pdme.measurement
|
||||
@ -36,9 +38,35 @@ class DirectMonteCarloConfig:
|
||||
tag: str = ""
|
||||
cap_core_count: int = 0 # 0 means cap at num cores - 1
|
||||
chunk_size: int = 50
|
||||
# chunk size of some kind
|
||||
write_bayesrun_file: bool = True
|
||||
bayesrun_file_timestamp: bool = True
|
||||
# chunk size of some kind
|
||||
skip_if_exists: bool = False
|
||||
|
||||
def get_filename(self) -> str:
|
||||
"""
|
||||
Generate a filename for the output of this run.
|
||||
"""
|
||||
# set starting execution timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
if self.bayesrun_file_timestamp:
|
||||
timestamp_str = f"{timestamp}-"
|
||||
else:
|
||||
timestamp_str = ""
|
||||
filename = f"{timestamp_str}{self.tag}.realdata.fast_filter.bayesrun.csv"
|
||||
_logger.debug(f"Got filename {filename}")
|
||||
return filename
|
||||
|
||||
def get_filename_regex(self) -> str:
|
||||
"""
|
||||
Generate a regex for the output of this run.
|
||||
"""
|
||||
|
||||
# having both timestamp and the hyphen separately optional is a bit of a hack
|
||||
# too loose, but will never matter
|
||||
pattern = rf"(?P<timestamp>\d{{8}}-\d{{6}})?-?{self.tag}\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
return pattern
|
||||
|
||||
|
||||
# Aliasing dict as a generic data container
|
||||
@ -230,8 +258,27 @@ class DirectMonteCarloRun:
|
||||
|
||||
def execute(self) -> Sequence[DirectMonteCarloResult]:
|
||||
|
||||
# set starting execution timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
filename = self.config.get_filename()
|
||||
if self.config.skip_if_exists:
|
||||
_logger.info(f"Checking if {filename} exists")
|
||||
cwd = pathlib.Path.cwd()
|
||||
if (cwd / filename).exists():
|
||||
_logger.info(f"File {filename} exists, skipping")
|
||||
return []
|
||||
if self.config.bayesrun_file_timestamp:
|
||||
_logger.info(
|
||||
"Also need to check file endings because of possible past or current timestamps, check only occurs if writing timestamp is set"
|
||||
)
|
||||
pattern = self.config.get_filename_regex()
|
||||
for file in cwd.iterdir():
|
||||
match = re.match(pattern, file.name)
|
||||
if match is not None:
|
||||
_logger.info(f"Matched {file.name} to {pattern}")
|
||||
_logger.info(f"File {filename} exists, skipping")
|
||||
return []
|
||||
_logger.info(
|
||||
f"Finished checking against pattern {pattern}, hopefully didn't take too long!"
|
||||
)
|
||||
|
||||
count_per_step = (
|
||||
self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
|
||||
@ -349,14 +396,6 @@ class DirectMonteCarloRun:
|
||||
|
||||
if self.config.write_bayesrun_file:
|
||||
|
||||
if self.config.bayesrun_file_timestamp:
|
||||
timestamp_str = f"{timestamp}-"
|
||||
else:
|
||||
timestamp_str = ""
|
||||
filename = (
|
||||
f"{timestamp_str}{self.config.tag}.realdata.fast_filter.bayesrun.csv"
|
||||
)
|
||||
|
||||
_logger.info(f"Going to write to file [{filename}]")
|
||||
# row: Dict[str, Union[int, float, str]] = {}
|
||||
row = {}
|
||||
|
@ -19,6 +19,11 @@ FILENAME_REGEX = re.compile(
|
||||
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
)
|
||||
|
||||
# probably a better way but who cares
|
||||
NO_TIMESTAMP_FILENAME_REGEX = re.compile(
|
||||
r"(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||
)
|
||||
|
||||
|
||||
SUBSET_SIM_FILENAME_REGEX = re.compile(
|
||||
r"(?P<filename_slug>.*)-(?:no_adaptive_steps_)?(?P<num_ss_runs>\d+)-nc_(?P<n_c>\d+)-ns_(?P<n_s>\d+)-mmax_(?P<mmax>\d+)\.multi\.subsetsim\.csv"
|
||||
@ -46,15 +51,23 @@ class GeneralOutput:
|
||||
results: typing.Sequence[GeneralModelResult]
|
||||
|
||||
|
||||
def _parse_string_output_filename(
|
||||
filename: str,
|
||||
) -> typing.Tuple[typing.Optional[str], str]:
|
||||
if match := FILENAME_REGEX.match(filename):
|
||||
groups = match.groupdict()
|
||||
return (groups["timestamp"], groups["filename_slug"])
|
||||
elif match := NO_TIMESTAMP_FILENAME_REGEX.match(filename):
|
||||
groups = match.groupdict()
|
||||
return (None, groups["filename_slug"])
|
||||
else:
|
||||
raise ValueError(f"Could not parse {filename} as a bayesrun output filename")
|
||||
|
||||
|
||||
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
filename = file.name
|
||||
match = FILENAME_REGEX.match(filename)
|
||||
if not match:
|
||||
raise ValueError(f"{filename} was not a valid bayesrun output")
|
||||
groups = match.groupdict()
|
||||
return BayesrunOutputFilename(
|
||||
timestamp=groups["timestamp"], filename_slug=groups["filename_slug"], path=file
|
||||
)
|
||||
timestamp, slug = _parse_string_output_filename(filename)
|
||||
return BayesrunOutputFilename(timestamp=timestamp, filename_slug=slug, path=file)
|
||||
|
||||
|
||||
def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||
|
@ -8,6 +8,8 @@ MODEL_REGEXES = [
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
|
||||
]
|
||||
]
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
[tool.poetry]
|
||||
name = "deepdog"
|
||||
version = "1.5.0"
|
||||
version = "1.7.0"
|
||||
description = ""
|
||||
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<3.10"
|
||||
pdme = "^1.5.0"
|
||||
numpy = "2.2.3"
|
||||
numpy = "2.2.4"
|
||||
scipy = "1.10"
|
||||
tqdm = "^4.66.2"
|
||||
|
||||
|
26
tests/direct_monte_carlo/test_config_filename.py
Normal file
26
tests/direct_monte_carlo/test_config_filename.py
Normal file
@ -0,0 +1,26 @@
|
||||
import re
|
||||
import deepdog.direct_monte_carlo
|
||||
|
||||
|
||||
def test_config_check_self():
|
||||
config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
|
||||
tag="test_tag",
|
||||
bayesrun_file_timestamp=False,
|
||||
)
|
||||
expected_filename = "test_tag.realdata.fast_filter.bayesrun.csv"
|
||||
actual_filename = config.get_filename()
|
||||
assert actual_filename == expected_filename
|
||||
regex = config.get_filename_regex()
|
||||
assert re.match(regex, actual_filename) is not None
|
||||
|
||||
|
||||
def test_config_check_self_with_timestamp():
|
||||
config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
|
||||
tag="test_tag",
|
||||
bayesrun_file_timestamp=True,
|
||||
)
|
||||
expected_filename_ending = "test_tag.realdata.fast_filter.bayesrun.csv"
|
||||
actual_filename = config.get_filename()
|
||||
assert actual_filename.endswith(expected_filename_ending)
|
||||
regex = config.get_filename_regex()
|
||||
assert re.match(regex, actual_filename) is not None
|
@ -48,6 +48,28 @@ def test_parse_groupdict_with_magnitude():
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
def test_parse_groupdict_with_negative_magnitude():
|
||||
example_column_name = "geom_-20_20_-10_10_0_5-magnitude_-3.5-orientation_free-dipole_count_100_success"
|
||||
|
||||
parsed = deepdog.results.read_csv._parse_bayesrun_column(example_column_name)
|
||||
assert parsed is not None
|
||||
expected = deepdog.results.read_csv.BayesrunColumnParsed(
|
||||
{
|
||||
"xmin": "-20",
|
||||
"xmax": "20",
|
||||
"ymin": "-10",
|
||||
"ymax": "10",
|
||||
"zmin": "0",
|
||||
"zmax": "5",
|
||||
"orientation": "free",
|
||||
"avg_filled": "100",
|
||||
"log_magnitude": "-3.5",
|
||||
"field_name": "success",
|
||||
}
|
||||
)
|
||||
assert parsed == expected
|
||||
|
||||
|
||||
# def test_parse_no_match_column_name():
|
||||
# parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
|
||||
# assert parsed is None
|
||||
|
19
tests/results/test_parse_filename.py
Normal file
19
tests/results/test_parse_filename.py
Normal file
@ -0,0 +1,19 @@
|
||||
import deepdog.results
|
||||
import pytest
|
||||
|
||||
|
||||
def test_parse_bayesrun_filename():
|
||||
valid1 = "20250226-204120-dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||
|
||||
timestamp, slug = deepdog.results._parse_string_output_filename(valid1)
|
||||
assert timestamp == "20250226-204120"
|
||||
assert slug == "dot1-dot1-2-0"
|
||||
|
||||
valid2 = "dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||
|
||||
timestamp, slug = deepdog.results._parse_string_output_filename(valid2)
|
||||
assert timestamp is None
|
||||
assert slug == "dot1-dot1-2-0"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
deepdog.results._parse_string_output_filename("not_a_valid_filename")
|
Loading…
x
Reference in New Issue
Block a user