Compare commits

...

3 Commits
1.2.0 ... 1.2.1

Author SHA1 Message Date
d258cfbec7 chore(release): 1.2.1
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-11 20:51:05 -05:00
b3bf4cde97 perf: precompile the magic regexes for probs parsing 2024-05-11 20:49:45 -05:00
60f29b0b2f perf: avoid recalculating product dict in indexifier to improve performance for probs 2024-05-11 20:49:26 -05:00
4 changed files with 25 additions and 13 deletions

View File

@@ -2,6 +2,8 @@
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09) ## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)

View File

@@ -31,10 +31,10 @@ class Indexifier:
def __init__(self, list_dict: typing.Dict[str, typing.Sequence]): def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
self.dict = list_dict self.dict = list_dict
self.product_dict = _dict_product(self.dict)
def indexify(self, n: int) -> typing.Dict[str, typing.Any]: def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
product_dict = _dict_product(self.dict) return self.product_dict[n]
return product_dict[n]
def _indexify_indices(self, n: int) -> typing.Sequence[int]: def _indexify_indices(self, n: int) -> typing.Sequence[int]:
""" """

View File

@@ -8,20 +8,30 @@ import csv
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv" FILENAME_REGEX = re.compile(
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
)
MODEL_REGEXES = [ MODEL_REGEXES = [
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)", re.compile(pattern)
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)", for pattern in [
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)" r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
]
] ]
FILE_SLUG_REGEXES = [ FILE_SLUG_REGEXES = [
r"mock_tarucha-(?P<job_index>\d+)", re.compile(pattern)
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)", for pattern in [
r"(?P<tag>\w+)-(?P<job_index>\d+)", r"(?P<tag>\w+)-(?P<job_index>\d+)",
r"mock_tarucha-(?P<job_index>\d+)",
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
]
] ]
SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
@dataclasses.dataclass @dataclasses.dataclass
class BayesrunOutputFilename: class BayesrunOutputFilename:
@@ -86,7 +96,7 @@ def _parse_bayesrun_column(
Returns the groupdict for the first match, or None if no match found. Returns the groupdict for the first match, or None if no match found.
""" """
for pattern in MODEL_REGEXES: for pattern in MODEL_REGEXES:
match = re.match(pattern, column) match = pattern.match(column)
if match: if match:
return BayesrunColumnParsed(match.groupdict()) return BayesrunColumnParsed(match.groupdict())
else: else:
@@ -125,7 +135,7 @@ def _parse_bayesrun_row(
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
filename = file.name filename = file.name
match = re.match(FILENAME_REGEX, filename) match = FILENAME_REGEX.match(filename)
if not match: if not match:
raise ValueError(f"{filename} was not a valid bayesrun output") raise ValueError(f"{filename} was not a valid bayesrun output")
groups = match.groupdict() groups = match.groupdict()
@@ -136,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]: def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
for pattern in FILE_SLUG_REGEXES: for pattern in FILE_SLUG_REGEXES:
match = re.match(pattern, slug) match = pattern.match(slug)
if match: if match:
return match.groupdict() return match.groupdict()
else: else:

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "deepdog" name = "deepdog"
version = "1.2.0" version = "1.2.1"
description = "" description = ""
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"] authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]