chore(release): 1.2.1

perf: precompile the magic regexes for probs parsing
perf: avoid recalculating product dict in indexifier to improve performance for probs
2024-05-11 20:51:05 -05:00 · 2024-05-11 20:49:45 -05:00 · 2024-05-11 20:49:26 -05:00
4 changed files with 25 additions and 13 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@

 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.

+### [1.2.1](https://gitea.deepak.science:2222/physics/deepdog/compare/1.2.0...1.2.1) (2024-05-12)
+
 ## [1.2.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.1.0...1.2.0) (2024-05-09)


--- a/deepdog/indexify/init.py
+++ b/deepdog/indexify/init.py
@@ -31,10 +31,10 @@ class Indexifier:

 	def __init__(self, list_dict: typing.Dict[str, typing.Sequence]):
 		self.dict = list_dict
+		self.product_dict = _dict_product(self.dict)

 	def indexify(self, n: int) -> typing.Dict[str, typing.Any]:
-		product_dict = _dict_product(self.dict)
-		return product_dict[n]
+		return self.product_dict[n]

 	def _indexify_indices(self, n: int) -> typing.Sequence[int]:
 		"""
--- a/deepdog/results/init.py
+++ b/deepdog/results/init.py
@@ -8,20 +8,30 @@ import csv

 _logger = logging.getLogger(__name__)

-FILENAME_REGEX = r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
+FILENAME_REGEX = re.compile(
+	r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
+)

 MODEL_REGEXES = [
-	r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
-	r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
-	r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)"
+	re.compile(pattern)
+	for pattern in [
+		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
+		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
+		r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
+	]
 ]

 FILE_SLUG_REGEXES = [
-	r"mock_tarucha-(?P<job_index>\d+)",
-	r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
-	r"(?P<tag>\w+)-(?P<job_index>\d+)",
+	re.compile(pattern)
+	for pattern in [
+		r"(?P<tag>\w+)-(?P<job_index>\d+)",
+		r"mock_tarucha-(?P<job_index>\d+)",
+		r"(?:(?P<mock>mock)_)?tarucha(?:_(?P<tarucha_run_id>\d+))?-(?P<job_index>\d+)",
+	]
 ]

+SIMPLE_TAG_REGEX = re.compile(r"\w+-\d+")
+

@dataclasses.dataclass
 class BayesrunOutputFilename:
@@ -86,7 +96,7 @@ def _parse_bayesrun_column(
 	Returns the groupdict for the first match, or None if no match found.
 	"""
 	for pattern in MODEL_REGEXES:
-		match = re.match(pattern, column)
+		match = pattern.match(column)
 		if match:
 			return BayesrunColumnParsed(match.groupdict())
 	else:
@@ -125,7 +135,7 @@ def _parse_bayesrun_row(

 def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
 	filename = file.name
-	match = re.match(FILENAME_REGEX, filename)
+	match = FILENAME_REGEX.match(filename)
 	if not match:
 		raise ValueError(f"{filename} was not a valid bayesrun output")
 	groups = match.groupdict()
@@ -136,7 +146,7 @@ def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:

 def _parse_file_slug(slug: str) -> typing.Optional[typing.Dict[str, str]]:
 	for pattern in FILE_SLUG_REGEXES:
-		match = re.match(pattern, slug)
+		match = pattern.match(slug)
 		if match:
 			return match.groupdict()
 	else:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepdog"
-version = "1.2.0"
+version = "1.2.1"
 description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]
Author	SHA1	Message	Date
Deepak Mallubhotla	d258cfbec7	chore(release): 1.2.1 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2024-05-11 20:51:05 -05:00
Deepak Mallubhotla	b3bf4cde97	perf: precompile the magic regexes for probs parsing	2024-05-11 20:49:45 -05:00
Deepak Mallubhotla	60f29b0b2f	perf: avoid recalculating product dict in indexifier to improve performance for probs	2024-05-11 20:49:26 -05:00