chore(release): 1.2.1

perf: precompile the magic regexes for probs parsing
perf: avoid recalculating product dict in indexifier to improve performance for probs
2024-05-11 20:51:05 -05:00 · 2024-05-11 20:49:45 -05:00 · 2024-05-11 20:49:26 -05:00 · 2024-05-08 22:24:28 -05:00 · 2024-05-08 22:23:57 -05:00 · 2024-05-03 10:44:06 -05:00
5 changed files with 69 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
 ### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
 ## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)
 ### Features
 * adds additional matching regexes ([dc1d2d4](https://gitea.deepak.science:2222/physics/deepdog/commit/dc1d2d45a3e631c5efccce80f8a24fa87c6089e0))
 * adds magnitude enabled parsing option ([f0e2fa3](https://gitea.deepak.science:2222/physics/deepdog/commit/f0e2fa3da9f5a5136908d691137a904fda4e3a9a))
 ## [1.1.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.0.1...1.1.0) (2024-05-03)
--- a/deepdog/indexify/init.py
+++ b/deepdog/indexify/init.py
@@ -31,10 +31,10 @@ class Indexifier:
 	def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
 		self.dict = list_dict
 		self.product_dict = _dict_product(self.dict)
 	def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
-		product_dict = _dict_product(self.dict)
+		return self.product_dict[n]
 		return product_dict[n]
 	def _indexify_indices(self, n: int) -> typing.Sequence[int]:
 		"""
--- a/deepdog/results/init.py
+++ b/deepdog/results/init.py
@@ -8,17 +8,30 @@ import csv
 _logger = logging.getLogger(__name__)
-FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
+FILENAME_REGEX = re.compile(
 	r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
 )
 MODEL_REGEXES = [
-	r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)"
+	re.compile(pattern)
 	for pattern in [
 		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 		r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 	]
 ]
 FILE_SLUG_REGEXES = [
 	re.compile(pattern)
 	for pattern in [
 		r"(?P<tag>\w+)-(?P<job_index>\d+)",
 		r"mock_tarucha-(?P<job_index>\d+)",
 		r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
 	]
 ]
 SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
@dataclasses.dataclass
 class BayesrunOutputFilename:
@@ -27,7 +40,6 @@ class BayesrunOutputFilename:
 	path: pathlib.Path
@dataclasses.dataclass
 class BayesrunColumnParsed:
 	"""
 	class for parsing a bayesrun while pulling certain special fields out
@@ -38,10 +50,21 @@ class BayesrunColumnParsed:
 		self.model_field_dict = {
 			k: v for k, v in groupdict.items() if k != "field_name"
 		}
 		self._groupdict_str = repr(groupdict)
 	def __str__(self):
 		return f"BayesrunColumnParsed[{self.column_field}: {self.model_field_dict}]"
 	def __repr__(self):
 		return f"BayesrunColumnParsed({self._groupdict_str})"
 	def __eq__(self, other):
 		if isinstance(other, BayesrunColumnParsed):
 			return (self.column_field == other.column_field) and (
 				self.model_field_dict == other.model_field_dict
 			)
 		return NotImplemented
@dataclasses.dataclass
 class BayesrunModelResult:
@@ -73,7 +96,7 @@ def _parse_bayesrun_column(
 	Returns the groupdict for the first match, or None if no match found.
 	"""
 	for pattern in MODEL_REGEXES:
-		match = re.match(pattern, column)
+		match = pattern.match(column)
 		if match:
 			return BayesrunColumnParsed(match.groupdict())
 	else:
@@ -112,7 +135,7 @@ def _parse_bayesrun_row(
 def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
 	filename = file.name
-	match = re.match(FILENAME_REGEX, filename)
+	match = FILENAME_REGEX.match(filename)
 	if not match:
 		raise ValueError(f"{filename} was not a valid bayesrun output")
 	groups = match.groupdict()
@@ -123,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
 def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
 	for pattern in FILE_SLUG_REGEXES:
-		match = re.match(pattern, slug)
+		match = pattern.match(slug)
 		if match:
 			return match.groupdict()
 	else:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepdog"
-version = "1.1.0"
+version = "1.2.1"
 description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
--- a/tests/results/test_column_results.py
+++ b/tests/results/test_column_results.py
@@ -7,6 +7,7 @@ def test_parse_groupdict():
 	)
 	parsed = deepdog.results._parse_bayesrun_column(example_column_name)
 	assert parsed is not None
 	expected = deepdog.results.BayesrunColumnParsed(
 		{
 			"xmin": "-20",
@@ -23,6 +24,30 @@ def test_parse_groupdict():
 	assert parsed == expected
 def test_parse_groupdict_with_magnitude():
 	example_column_name = (
 		"geom_-20_20_-10_10_0_5-magnitude_3.5-orientation_free-dipole_count_100_success"
 	)
 	parsed = deepdog.results._parse_bayesrun_column(example_column_name)
 	assert parsed is not None
 	expected = deepdog.results.BayesrunColumnParsed(
 		{
 			"xmin": "-20",
 			"xmax": "20",
 			"ymin": "-10",
 			"ymax": "10",
 			"zmin": "0",
 			"zmax": "5",
 			"orientation": "free",
 			"avg_filled": "100",
 			"log_magnitude": "3.5",
 			"field_name": "success",
 		}
 	)
 	assert parsed == expected
 # def test_parse_no_match_column_name():
 # 	parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
 # 	assert parsed is None
Author	SHA1	Message	Date
Deepak Mallubhotla	d258cfbec7	chore(release): 1.2.1 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2024-05-11 20:51:05 -05:00
Deepak Mallubhotla	b3bf4cde97	perf: precompile the magic regexes for probs parsing	2024-05-11 20:49:45 -05:00
Deepak Mallubhotla	60f29b0b2f	perf: avoid recalculating product dict in indexifier to improve performance for probs	2024-05-11 20:49:26 -05:00
Deepak Mallubhotla	093a3fb5c4	chore(release): 1.2.0 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2024-05-08 22:24:28 -05:00
Deepak Mallubhotla	dc1d2d45a3	feat: adds additional matching regexes All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2024-05-08 22:23:57 -05:00
Deepak Mallubhotla	f0e2fa3da9	feat: adds magnitude enabled parsing option	2024-05-03 10:44:06 -05:00