chore(deps): update dependency numpy to v2

chore(release): 1.7.0
feat: adds configurable skip if file exists
2025-03-17 01:31:05 +00:00 · 2025-02-26 21:57:13 -06:00 · 2025-02-26 21:55:12 -06:00 · 2025-02-26 21:08:00 -06:00 · 2025-02-26 21:01:19 -06:00 · 2025-02-24 08:34:11 -06:00
8 changed files with 156 additions and 20 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,21 @@

 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.

+## [1.7.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.6.0...1.7.0) (2025-02-27)
+
+
+### Features
+
+* adds configurable skip if file exists ([24c6e31](https://gitea.deepak.science:2222/physics/deepdog/commit/24c6e311c1d3067eb98cc60e6ca38d76373bf08e))
+
+## [1.6.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.5.0...1.6.0) (2025-02-27)
+
+
+### Features
+
+* Adds ability to parse bayesruns without timestamps ([46f6b6c](https://gitea.deepak.science:2222/physics/deepdog/commit/46f6b6cdf15c67aedf0c871d201b8db320bccbdf))
+* allows negative log magnitude strings in models ([c8435b4](https://gitea.deepak.science:2222/physics/deepdog/commit/c8435b4b2a6e4b89030f53b5734eb743e2003fb7))
+
 ## [1.5.0](https://gitea.deepak.science:2222/physics/deepdog/compare/1.4.0...1.5.0) (2024-12-30)


--- a/deepdog/direct_monte_carlo/direct_mc.py
+++ b/deepdog/direct_monte_carlo/direct_mc.py
@ -1,3 +1,5 @@
+import re
+import pathlib
 import csv
 import pdme.model
 import pdme.measurement
@ -36,9 +38,35 @@ class DirectMonteCarloConfig:
 	tag: str = ""
 	cap_core_count: int = 0  # 0 means cap at num cores - 1
 	chunk_size: int = 50
+	# chunk size of some kind
 	write_bayesrun_file: bool = True
 	bayesrun_file_timestamp: bool = True
-	# chunk size of some kind
+	skip_if_exists: bool = False
+
+	def get_filename(self) -> str:
+		"""
+		Generate a filename for the output of this run.
+		"""
+		# set starting execution timestamp
+		timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+
+		if self.bayesrun_file_timestamp:
+			timestamp_str = f"{timestamp}-"
+		else:
+			timestamp_str = ""
+		filename = f"{timestamp_str}{self.tag}.realdata.fast_filter.bayesrun.csv"
+		_logger.debug(f"Got filename {filename}")
+		return filename
+
+	def get_filename_regex(self) -> str:
+		"""
+		Generate a regex for the output of this run.
+		"""
+
+		# having both timestamp and the hyphen separately optional is a bit of a hack
+		# too loose, but will never matter
+		pattern = rf"(?P<timestamp>\d{{8}}-\d{{6}})?-?{self.tag}\.realdata\.fast_filter\.bayesrun\.csv"
+		return pattern


 # Aliasing dict as a generic data container
@ -230,8 +258,27 @@ class DirectMonteCarloRun:

 	def execute(self) -> Sequence[DirectMonteCarloResult]:

-		# set starting execution timestamp
-		timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+		filename = self.config.get_filename()
+		if self.config.skip_if_exists:
+			_logger.info(f"Checking if {filename} exists")
+			cwd = pathlib.Path.cwd()
+			if (cwd / filename).exists():
+				_logger.info(f"File {filename} exists, skipping")
+				return []
+			if self.config.bayesrun_file_timestamp:
+				_logger.info(
+					"Also need to check file endings because of possible past or current timestamps, check only occurs if writing timestamp is set"
+				)
+				pattern = self.config.get_filename_regex()
+				for file in cwd.iterdir():
+					match = re.match(pattern, file.name)
+					if match is not None:
+						_logger.info(f"Matched {file.name} to {pattern}")
+						_logger.info(f"File {filename} exists, skipping")
+						return []
+				_logger.info(
+					f"Finished checking against pattern {pattern}, hopefully didn't take too long!"
+				)

 		count_per_step = (
 			self.config.monte_carlo_count_per_cycle * self.config.monte_carlo_cycles
@ -349,14 +396,6 @@ class DirectMonteCarloRun:

 		if self.config.write_bayesrun_file:

-			if self.config.bayesrun_file_timestamp:
-				timestamp_str = f"{timestamp}-"
-			else:
-				timestamp_str = ""
-			filename = (
-				f"{timestamp_str}{self.config.tag}.realdata.fast_filter.bayesrun.csv"
-			)
-
 			_logger.info(f"Going to write to file [{filename}]")
 			# row: Dict[str, Union[int, float, str]] = {}
 			row = {}
--- a/deepdog/results/init.py
+++ b/deepdog/results/init.py
@ -19,6 +19,11 @@ FILENAME_REGEX = re.compile(
 	r"(?P<timestamp>\d{8}-\d{6})-(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
 )

+# probably a better way but who cares
+NO_TIMESTAMP_FILENAME_REGEX = re.compile(
+	r"(?P<filename_slug>.*)\.realdata\.fast_filter\.bayesrun\.csv"
+)
+

 SUBSET_SIM_FILENAME_REGEX = re.compile(
 	r"(?P<filename_slug>.*)-(?:no_adaptive_steps_)?(?P<num_ss_runs>\d+)-nc_(?P<n_c>\d+)-ns_(?P<n_s>\d+)-mmax_(?P<mmax>\d+)\.multi\.subsetsim\.csv"
@ -46,15 +51,23 @@ class GeneralOutput:
 	results: typing.Sequence[GeneralModelResult]


+def _parse_string_output_filename(
+	filename: str,
+) -> typing.Tuple[typing.Optional[str], str]:
+	if match := FILENAME_REGEX.match(filename):
+		groups = match.groupdict()
+		return (groups["timestamp"], groups["filename_slug"])
+	elif match := NO_TIMESTAMP_FILENAME_REGEX.match(filename):
+		groups = match.groupdict()
+		return (None, groups["filename_slug"])
+	else:
+		raise ValueError(f"Could not parse {filename} as a bayesrun output filename")
+
+
 def _parse_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
 	filename = file.name
-	match = FILENAME_REGEX.match(filename)
-	if not match:
-		raise ValueError(f"{filename} was not a valid bayesrun output")
-	groups = match.groupdict()
-	return BayesrunOutputFilename(
-		timestamp=groups["timestamp"], filename_slug=groups["filename_slug"], path=file
-	)
+	timestamp, slug = _parse_string_output_filename(filename)
+	return BayesrunOutputFilename(timestamp=timestamp, filename_slug=slug, path=file)


 def _parse_ss_output_filename(file: pathlib.Path) -> BayesrunOutputFilename:
--- a/deepdog/results/read_csv.py
+++ b/deepdog/results/read_csv.py
@ -8,6 +8,8 @@ MODEL_REGEXES = [
 		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 		r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
+		r"geom_(?P<xmin>-?\d+)_(?P<xmax>-?\d+)_(?P<ymin>-?\d+)_(?P<ymax>-?\d+)_(?P<zmin>-?\d+)_(?P<zmax>-?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
+		r"geom_(?P<xmin>-?\d*\.?\d+)_(?P<xmax>-?\d*\.?\d+)_(?P<ymin>-?\d*\.?\d+)_(?P<ymax>-?\d*\.?\d+)_(?P<zmin>-?\d*\.?\d+)_(?P<zmax>-?\d*\.?\d+)-magnitude_(?P<log_magnitude>-?\d*\.?\d+)-orientation_(?P<orientation>free|fixedxy|fixedz)-dipole_count_(?P<avg_filled>\d+)_(?P<field_name>\w*)",
 	]
 ]

--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,13 +1,13 @@
 [tool.poetry]
 name = "deepdog"
-version = "1.5.0"
+version = "1.7.0"
 description = ""
 authors = ["Deepak Mallubhotla <dmallubhotla+github@gmail.com>"]

 [tool.poetry.dependencies]
 python = ">=3.8.1,<3.10"
 pdme = "^1.5.0"
-numpy = "2.2.3"
+numpy = "2.2.4"
 scipy = "1.10"
 tqdm = "^4.66.2"

--- a/tests/direct_monte_carlo/test_config_filename.py
+++ b/tests/direct_monte_carlo/test_config_filename.py
@ -0,0 +1,26 @@
+import re
+import deepdog.direct_monte_carlo
+
+
+def test_config_check_self():
+	config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
+		tag="test_tag",
+		bayesrun_file_timestamp=False,
+	)
+	expected_filename = "test_tag.realdata.fast_filter.bayesrun.csv"
+	actual_filename = config.get_filename()
+	assert actual_filename == expected_filename
+	regex = config.get_filename_regex()
+	assert re.match(regex, actual_filename) is not None
+
+
+def test_config_check_self_with_timestamp():
+	config = deepdog.direct_monte_carlo.DirectMonteCarloConfig(
+		tag="test_tag",
+		bayesrun_file_timestamp=True,
+	)
+	expected_filename_ending = "test_tag.realdata.fast_filter.bayesrun.csv"
+	actual_filename = config.get_filename()
+	assert actual_filename.endswith(expected_filename_ending)
+	regex = config.get_filename_regex()
+	assert re.match(regex, actual_filename) is not None
--- a/tests/results/test_column_results.py
+++ b/tests/results/test_column_results.py
@ -48,6 +48,28 @@ def test_parse_groupdict_with_magnitude():
 	assert parsed == expected


+def test_parse_groupdict_with_negative_magnitude():
+	example_column_name = "geom_-20_20_-10_10_0_5-magnitude_-3.5-orientation_free-dipole_count_100_success"
+
+	parsed = deepdog.results.read_csv._parse_bayesrun_column(example_column_name)
+	assert parsed is not None
+	expected = deepdog.results.read_csv.BayesrunColumnParsed(
+		{
+			"xmin": "-20",
+			"xmax": "20",
+			"ymin": "-10",
+			"ymax": "10",
+			"zmin": "0",
+			"zmax": "5",
+			"orientation": "free",
+			"avg_filled": "100",
+			"log_magnitude": "-3.5",
+			"field_name": "success",
+		}
+	)
+	assert parsed == expected
+
+
 # def test_parse_no_match_column_name():
 # 	parsed = deepdog.results.parse_bayesrun_column("There's nothing here")
 # 	assert parsed is None
--- a/tests/results/test_parse_filename.py
+++ b/tests/results/test_parse_filename.py
@ -0,0 +1,19 @@
+import deepdog.results
+import pytest
+
+
+def test_parse_bayesrun_filename():
+	valid1 = "20250226-204120-dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
+
+	timestamp, slug = deepdog.results._parse_string_output_filename(valid1)
+	assert timestamp == "20250226-204120"
+	assert slug == "dot1-dot1-2-0"
+
+	valid2 = "dot1-dot1-2-0.realdata.fast_filter.bayesrun.csv"
+
+	timestamp, slug = deepdog.results._parse_string_output_filename(valid2)
+	assert timestamp is None
+	assert slug == "dot1-dot1-2-0"
+
+	with pytest.raises(ValueError):
+		deepdog.results._parse_string_output_filename("not_a_valid_filename")
Author	SHA1	Message	Date
Renovate Bot	f33b5c7292	chore(deps): update dependency numpy to v2 Some checks failed renovate/artifacts Artifact file update failure gitea-physics/deepdog/pipeline/pr-master There was a failure building this commit Details	2025-03-17 01:31:05 +00:00
Deepak Mallubhotla	71dc906a96	chore(release): 1.7.0 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2025-02-26 21:57:13 -06:00
Deepak Mallubhotla	24c6e311c1	feat: adds configurable skip if file exists All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2025-02-26 21:55:12 -06:00
Deepak Mallubhotla	4dd3004a7b	chore(release): 1.6.0 All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details gitea-physics/deepdog/pipeline/tag This commit looks good Details	2025-02-26 21:08:00 -06:00
Deepak Mallubhotla	46f6b6cdf1	feat: Adds ability to parse bayesruns without timestamps All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2025-02-26 21:01:19 -06:00
Deepak Mallubhotla	c8435b4b2a	feat: allows negative log magnitude strings in models All checks were successful gitea-physics/deepdog/pipeline/head This commit looks good Details	2025-02-24 08:34:11 -06:00