From 9a7a3ff2c7ebe81d5e10647ce39844c372ff7b07 Mon Sep 17 00:00:00 2001 From: Deepak Mallubhotla Date: Thu, 27 Jul 2023 17:39:02 -0500 Subject: [PATCH] feat: adds configurable chunk size for the initial mc level 0 SS stage cost calculation to reduce memory usage --- deepdog/bayes_run_with_ss.py | 4 +++- deepdog/subset_simulation/subset_simulation_impl.py | 12 +++++++++++- tests/test_bayes_run_with_ss.py | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/deepdog/bayes_run_with_ss.py b/deepdog/bayes_run_with_ss.py index 8ef1322..6e41ffe 100644 --- a/deepdog/bayes_run_with_ss.py +++ b/deepdog/bayes_run_with_ss.py @@ -71,6 +71,7 @@ class BayesRunWithSubspaceSimulation: ss_default_w_log_step=0.01, ss_default_upper_w_log_step=4, ss_dump_last_generation=False, + ss_initial_costs_chunk_size=100, write_output_to_bayesruncsv=True, ) -> None: self.dot_inputs = pdme.inputs.inputs_with_frequency_range( @@ -136,7 +137,7 @@ class BayesRunWithSubspaceSimulation: self.ss_default_w_log_step = ss_default_w_log_step self.ss_default_upper_w_log_step = ss_default_upper_w_log_step self.ss_dump_last_generation = ss_dump_last_generation - + self.ss_initial_costs_chunk_size = ss_initial_costs_chunk_size self.run_count = run_count self.write_output_to_csv = write_output_to_bayesruncsv @@ -183,6 +184,7 @@ class BayesRunWithSubspaceSimulation: self.ss_default_r_step, self.ss_default_w_log_step, self.ss_default_upper_w_log_step, + initial_cost_chunk_size=self.ss_initial_costs_chunk_size, keep_probs_list=False, dump_last_generation_to_file=self.ss_dump_last_generation, ) diff --git a/deepdog/subset_simulation/subset_simulation_impl.py b/deepdog/subset_simulation/subset_simulation_impl.py index e2dab92..986cbf0 100644 --- a/deepdog/subset_simulation/subset_simulation_impl.py +++ b/deepdog/subset_simulation/subset_simulation_impl.py @@ -40,6 +40,7 @@ class SubsetSimulation: default_upper_w_log_step=4, keep_probs_list=True, dump_last_generation_to_file=False, + initial_cost_chunk_size=100, ): name, model = model_name_pair self.model_name = name @@ -85,6 +86,8 @@ class SubsetSimulation: self.keep_probs_list = keep_probs_list self.dump_last_generations = dump_last_generation_to_file + self.initial_cost_chunk_size = initial_cost_chunk_size + def execute(self) -> SubsetSimulationResult: probs_list = [] @@ -96,7 +99,14 @@ class SubsetSimulation: ) # _logger.debug(sample_dipoles) # _logger.debug(sample_dipoles.shape) - costs = self.cost_function_to_use(sample_dipoles) + + raw_costs = [] + _logger.debug(f"Using iterated cost function thing with chunk size {self.initial_cost_chunk_size}") + + for x in range(0, len(sample_dipoles), self.initial_cost_chunk_size): + _logger.debug(f"doing chunk {x}") + raw_costs.extend(self.cost_function_to_use(sample_dipoles[x: x + self.initial_cost_chunk_size])) + costs = numpy.array(raw_costs) _logger.debug(f"costs: {costs}") sorted_indexes = costs.argsort()[::-1] diff --git a/tests/test_bayes_run_with_ss.py b/tests/test_bayes_run_with_ss.py index ca2bcda..f196fdd 100644 --- a/tests/test_bayes_run_with_ss.py +++ b/tests/test_bayes_run_with_ss.py @@ -108,6 +108,7 @@ def test_basic_analysis(snapshot): ss_default_upper_w_log_step=4, ss_dump_last_generation=False, write_output_to_bayesruncsv=False, + ss_initial_costs_chunk_size=1000, ) result = square_run.go() @@ -150,6 +151,7 @@ def test_bayesss_with_tighter_cost(snapshot): ss_default_upper_w_log_step=4, ss_dump_last_generation=False, write_output_to_bayesruncsv=False, + ss_initial_costs_chunk_size=1 ) result = square_run.go()