Compare commits

..

6 Commits
1.1.0 ... 1.2.1

Author SHA1 Message Date
d258cfbec7 chore(release): 1.2.1
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-11 20:51:05 -05:00
b3bf4cde97 perf: precompile the magic regexes for probs parsing 2024-05-11 20:49:45 -05:00
60f29b0b2f perf: avoid recalculating product dict in indexifier to improve performance for probs 2024-05-11 20:49:26 -05:00
093a3fb5c4 chore(release): 1.2.0
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
gitea-physics/deepdog/pipeline/tag This commit looks good
2024-05-08 22:24:28 -05:00
dc1d2d45a3 feat: adds additional matching regexes
All checks were successful
gitea-physics/deepdog/pipeline/head This commit looks good
2024-05-08 22:23:57 -05:00
f0e2fa3da9 feat: adds magnitude enabled parsing option 2024-05-03 10:44:06 -05:00
5 changed files with 69 additions and 11 deletions

View File

@@ -2,6 +2,16 @@
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)
### Features
* adds additional matching regexes ([dc1d2d4](https://gitea.deepak.science:2222/physics/deepdog/commit/dc1d2d45a3e631c5efccce80f8a24fa87c6089e0))
* adds magnitude enabled parsing option ([f0e2fa3](https://gitea.deepak.science:2222/physics/deepdog/commit/f0e2fa3da9f5a5136908d691137a904fda4e3a9a))
## [1.1.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.1...1.1.0) (2024-05-03) ## [1.1.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.1...1.1.0) (2024-05-03)

View File

@@ -31,10 +31,10 @@ class Indexifier:
def __init__(self, list_dict: typing.Dict[str, typing.Sequence]): def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
self.dict = list_dict self.dict = list_dict
self.product_dict = _dict_product(self.dict)
def indexify(self, n: int) -> typing.Dict[str, typing.Any]: def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
product_dict = _dict_product(self.dict) return self.product_dict[n]
return product_dict[n]
def _indexify_indices(self, n: int) -> typing.Sequence[int]: def _indexify_indices(self, n: int) -> typing.Sequence[int]:
""" """

View File

@@ -8,17 +8,30 @@ import csv
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv" FILENAME_REGEX = re.compile(
r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
)
MODEL_REGEXES = [ MODEL_REGEXES = [
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)" re.compile(pattern)
for pattern in [
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
]
] ]
FILE_SLUG_REGEXES = [ FILE_SLUG_REGEXES = [
re.compile(pattern)
for pattern in [
r"(?P<tag>\w+)-(?P<job_index>\d+)",
r"mock_tarucha-(?P<job_index>\d+)", r"mock_tarucha-(?P<job_index>\d+)",
r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)", r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
]
] ]
SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
@dataclasses.dataclass @dataclasses.dataclass
class BayesrunOutputFilename: class BayesrunOutputFilename:
@@ -27,7 +40,6 @@ class BayesrunOutputFilename:
path: pathlib.Path path: pathlib.Path
@dataclasses.dataclass
class BayesrunColumnParsed: class BayesrunColumnParsed:
""" """
class for parsing a bayesrun while pulling certain special fields out class for parsing a bayesrun while pulling certain special fields out
@@ -38,10 +50,21 @@ class BayesrunColumnParsed:
self.model_field_dict = { self.model_field_dict = {
k: v for k, v in groupdict.items() if k != "field_name" k: v for k, v in groupdict.items() if k != "field_name"
} }
self._groupdict_str = repr(groupdict)
def __str__(self): def __str__(self):
return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]" return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
def __repr__(self):
return f"BayesrunColumnParsed({self._groupdict_str})"
def __eq__(self, other):
if isinstance(other, BayesrunColumnParsed):
return (self.column_field == other.column_field) and (
self.model_field_dict == other.model_field_dict
)
return NotImplemented
@dataclasses.dataclass @dataclasses.dataclass
class BayesrunModelResult: class BayesrunModelResult:
@@ -73,7 +96,7 @@ def _parse_bayesrun_column(
Returns the groupdict for the first match, or None if no match found. Returns the groupdict for the first match, or None if no match found.
""" """
for pattern in MODEL_REGEXES: for pattern in MODEL_REGEXES:
match = re.match(pattern, column) match = pattern.match(column)
if match: if match:
return BayesrunColumnParsed(match.groupdict()) return BayesrunColumnParsed(match.groupdict())
else: else:
@@ -112,7 +135,7 @@ def _parse_bayesrun_row(
def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename: def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
filename = file.name filename = file.name
match = re.match(FILENAME_REGEX, filename) match = FILENAME_REGEX.match(filename)
if not match: if not match:
raise ValueError(f"{filename} was not a valid bayesrun output") raise ValueError(f"{filename} was not a valid bayesrun output")
groups = match.groupdict() groups = match.groupdict()
@@ -123,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]: def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
for pattern in FILE_SLUG_REGEXES: for pattern in FILE_SLUG_REGEXES:
match = re.match(pattern, slug) match = pattern.match(slug)
if match: if match:
return match.groupdict() return match.groupdict()
else: else:

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "deepdog" name = "deepdog"
version = "1.1.0" version = "1.2.1"
description = "" description = ""
authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"] authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]

View File

@@ -7,6 +7,7 @@ def test_parse_groupdict():
) )
parsed = deepdog.results._parse_bayesrun_column(example_column_name) parsed = deepdog.results._parse_bayesrun_column(example_column_name)
assert parsed is not None
expected = deepdog.results.BayesrunColumnParsed( expected = deepdog.results.BayesrunColumnParsed(
{ {
"xmin": "-20", "xmin": "-20",
@@ -23,6 +24,30 @@ def test_parse_groupdict():
assert parsed == expected assert parsed == expected
def test_parse_groupdict_with_magnitude():
example_column_name = (
"geom_-20_20_-10_10_0_5-magnitude_3.5-orientation_free-dipole_count_100_success"
)
parsed = deepdog.results._parse_bayesrun_column(example_column_name)
assert parsed is not None
expected = deepdog.results.BayesrunColumnParsed(
{
"xmin": "-20",
"xmax": "20",
"ymin": "-10",
"ymax": "10",
"zmin": "0",
"zmax": "5",
"orientation": "free",
"avg_filled": "100",
"log_magnitude": "3.5",
"field_name": "success",
}
)
assert parsed == expected
# def test_parse_no_match_column_name(): # def test_parse_no_match_column_name():
# parsed = deepdog.results.parse_bayesrun_column("There's nothing here") # parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
# assert parsed is None # assert parsed is None