feat: Adds ability to parse bayesruns without timestamps
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
This commit is contained in:
parent
c8435b4b2a
commit
46f6b6cdf1
@ -19,6 +19,11 @@ FILENAME_REGEX = re.compile(
|
|||||||
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# probably a better way but who cares
|
||||||
|
NO_TIMESTAMP_FILENAME_REGEX = re.compile(
|
||||||
|
r"(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
SUBSET_SIM_FILENAME_REGEX = re.compile(
|
SUBSET_SIM_FILENAME_REGEX = re.compile(
|
||||||
r"(?P<filename_slug>.*)-(?:no_adaptive_steps_)?(?P<num_ss_runs>\d+)-nc_(?P<n_c>\d+)-ns_(?P<n_s>\d+)-mmax_(?P<mmax>\d+)\.multi\.subsetsim\.csv"
|
r"(?P<filename_slug>.*)-(?:no_adaptive_steps_)?(?P<num_ss_runs>\d+)-nc_(?P<n_c>\d+)-ns_(?P<n_s>\d+)-mmax_(?P<mmax>\d+)\.multi\.subsetsim\.csv"
|
||||||
@ -46,15 +51,23 @@ class GeneralOutput:
|
|||||||
results: typing.Sequence[GeneralModelResult]
|
results: typing.Sequence[GeneralModelResult]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_string_output_filename(
|
||||||
|
filename: str,
|
||||||
|
) -> typing.Tuple[typing.Optional[str], str]:
|
||||||
|
if match := FILENAME_REGEX.match(filename):
|
||||||
|
groups = match.groupdict()
|
||||||
|
return (groups["timestamp"], groups["filename_slug"])
|
||||||
|
elif match := NO_TIMESTAMP_FILENAME_REGEX.match(filename):
|
||||||
|
groups = match.groupdict()
|
||||||
|
return (None, groups["filename_slug"])
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Could not parse {filename} as a bayesrun output filename")
|
||||||
|
|
||||||
|
|
||||||
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||||
filename = file.name
|
filename = file.name
|
||||||
match = FILENAME_REGEX.match(filename)
|
timestamp, slug = _parse_string_output_filename(filename)
|
||||||
if not match:
|
return BayesrunOutputFilename(timestamp=timestamp, filename_slug=slug, path=file)
|
||||||
raise ValueError(f"{filename} was not a valid bayesrun output")
|
|
||||||
groups = match.groupdict()
|
|
||||||
return BayesrunOutputFilename(
|
|
||||||
timestamp=groups["timestamp"], filename_slug=groups["filename_slug"], path=file
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
|
||||||
|
19
tests/results/test_parse_filename.py
Normal file
19
tests/results/test_parse_filename.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import deepdog.results
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_bayesrun_filename():
|
||||||
|
valid1 = "20250226-204120-dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||||
|
|
||||||
|
timestamp, slug = deepdog.results._parse_string_output_filename(valid1)
|
||||||
|
assert timestamp == "20250226-204120"
|
||||||
|
assert slug == "dot1-dot1-2-0"
|
||||||
|
|
||||||
|
valid2 = "dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
|
||||||
|
|
||||||
|
timestamp, slug = deepdog.results._parse_string_output_filename(valid2)
|
||||||
|
assert timestamp is None
|
||||||
|
assert slug == "dot1-dot1-2-0"
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
deepdog.results._parse_string_output_filename("not_a_valid_filename")
|
Loading…
x
Reference in New Issue
Block a user