From b3bf4cde97461d8d621e258285ec21dc52891c75 Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Sat, 11 May 2024 20:49:45 -0500 Subject: [PATCH] perf: precompile the magic regexes for probs parsing --- deepdog/results/__init__.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/deepdog/results/__init__.py b/deepdog/results/__init__.py index ebe8755..d050574 100644 --- a/deepdog/results/__init__.py +++ b/deepdog/results/__init__.py @@ -8,20 +8,30 @@ import csv _logger = logging.getLogger(__name__) -FILENAME_REGEX = r"(?P\d{8}-\d{6})-(?P.*)\.realdata\.fast_filter\.bayesrun\.csv" +FILENAME_REGEX = re.compile( + r"(?P\d{8}-\d{6})-(?P.*)\.realdata\.fast_filter\.bayesrun\.csv" +) MODEL_REGEXES = [ - r"geom_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)", - r"geom_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)-magnitude_(?P\d*\.?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)", - r"geom_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)-magnitude_(?P\d*\.?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)" + re.compile(pattern) + for pattern in [ + r"geom_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)", + r"geom_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)_(?P-?\d+)-magnitude_(?P\d*\.?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)", + r"geom_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)_(?P-?\d*\.?\d+)-magnitude_(?P\d*\.?\d+)-orientation_(?Pfree|fixedxy|fixedz)-dipole_count_(?P\d+)_(?P\w*)", + ] ] FILE_SLUG_REGEXES = [ - r"mock_tarucha-(?P\d+)", - r"(?:(?Pmock)_)?tarucha(?:_(?P\d+))?-(?P\d+)", - r"(?P\w+)-(?P\d+)", + re.compile(pattern) + for pattern in [ + r"(?P\w+)-(?P\d+)", + r"mock_tarucha-(?P\d+)", + r"(?:(?Pmock)_)?tarucha(?:_(?P\d+))?-(?P\d+)", + ] ] +SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+") + @dataclasses.dataclass class BayesrunOutputFilename: @@ -86,7 +96,7 @@ def _parse_bayesrun_column( Returns the groupdict for the first match, or None if no match found. """ for pattern in MODEL_REGEXES: - match = re.match(pattern, column) + match = pattern.match(column) if match: return BayesrunColumnParsed(match.groupdict()) else: @@ -125,7 +135,7 @@ def _parse_bayesrun_row( def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: filename = file.name - match = re.match(FILENAME_REGEX, filename) + match = FILENAME_REGEX.match(filename) if not match: raise ValueError(f"{filename} was not a valid bayesrun output") groups = match.groupdict() @@ -136,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]: for pattern in FILE_SLUG_REGEXES: - match = re.match(pattern, slug) + match = pattern.match(slug) if match: return match.groupdict() else: