From 46f6b6cdf15c67aedf0c871d201b8db320bccbdf Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Wed, 26 Feb 2025 21:01:19 -0600 Subject: [PATCH] feat: Adds ability to parse bayesruns without timestamps --- deepdog/results/__init__.py | 27 ++++++++++++++++++++------- tests/results/test_parse_filename.py | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 tests/results/test_parse_filename.py diff --git a/deepdog/results/__init__.py b/deepdog/results/__init__.py index a931aca..e697085 100644 --- a/deepdog/results/__init__.py +++ b/deepdog/results/__init__.py @@ -19,6 +19,11 @@ FILENAME_REGEX = re.compile( r"(?P\d{8}-\d{6})-(?P.*)\.realdata\.fast_filter\.bayesrun\.csv" ) +# probably a better way but who cares +NO_TIMESTAMP_FILENAME_REGEX = re.compile( + r"(?P.*)\.realdata\.fast_filter\.bayesrun\.csv" +) + SUBSET_SIM_FILENAME_REGEX = re.compile( r"(?P.*)-(?:no_adaptive_steps_)?(?P\d+)-nc_(?P\d+)-ns_(?P\d+)-mmax_(?P\d+)\.multi\.subsetsim\.csv" @@ -46,15 +51,23 @@ class GeneralOutput: results: typing.Sequence[GeneralModelResult] +def _parse_string_output_filename( + filename: str, +) -> typing.Tuple[typing.Optional[str], str]: + if match := FILENAME_REGEX.match(filename): + groups = match.groupdict() + return (groups["timestamp"], groups["filename_slug"]) + elif match := NO_TIMESTAMP_FILENAME_REGEX.match(filename): + groups = match.groupdict() + return (None, groups["filename_slug"]) + else: + raise ValueError(f"Could not parse {filename} as a bayesrun output filename") + + def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: filename = file.name - match = FILENAME_REGEX.match(filename) - if not match: - raise ValueError(f"{filename} was not a valid bayesrun output") - groups = match.groupdict() - return BayesrunOutputFilename( - timestamp=groups["timestamp"], filename_slug=groups["filename_slug"], path=file - ) + timestamp, slug = _parse_string_output_filename(filename) + return BayesrunOutputFilename(timestamp=timestamp, filename_slug=slug, path=file) def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: diff --git a/tests/results/test_parse_filename.py b/tests/results/test_parse_filename.py new file mode 100644 index 0000000..04c0c52 --- /dev/null +++ b/tests/results/test_parse_filename.py @@ -0,0 +1,19 @@ +import deepdog.results +import pytest + + +def test_parse_bayesrun_filename(): + valid1 = "20250226-204120-dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv" + + timestamp, slug = deepdog.results._parse_string_output_filename(valid1) + assert timestamp == "20250226-204120" + assert slug == "dot1-dot1-2-0" + + valid2 = "dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv" + + timestamp, slug = deepdog.results._parse_string_output_filename(valid2) + assert timestamp is None + assert slug == "dot1-dot1-2-0" + + with pytest.raises(ValueError): + deepdog.results._parse_string_output_filename("not_a_valid_filename")