feat: better management of cli wrapper

This commit is contained in:
2024-04-27 23:04:33 -05:00
parent 4b2e573715
commit b0ad4bead0
6 changed files with 110 additions and 46 deletions

View File

@@ -1,5 +1,5 @@
from deepdog.cli.probs.main import main from deepdog.cli.probs.main import wrapped_main
__all__ = [ __all__ = [
"main", "wrapped_main",
] ]

View File

@@ -27,8 +27,8 @@ def parse_args() -> argparse.Namespace:
) )
parser.add_argument( parser.add_argument(
"--indexify-json", "--indexify-json",
help="A json file with the indexify config for parsing job indexes", help="A json file with the indexify config for parsing job indexes. Will skip if not present",
default="indexes.json", default="",
) )
parser.add_argument( parser.add_argument(
"--seed-index", "--seed-index",

View File

@@ -2,6 +2,7 @@ import typing
from deepdog.results import BayesrunOutput from deepdog.results import BayesrunOutput
import logging import logging
import csv import csv
import tqdm
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
@@ -21,7 +22,8 @@ def build_model_dict(
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]] typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
] = {} ] = {}
for out in bayes_outputs: _logger.info("building model dict")
for out in tqdm.tqdm(bayes_outputs, desc="reading outputs", leave=False):
for model_result in out.results: for model_result in out.results:
model_key = tuple(v for v in model_result.parsed_model_keys.values()) model_key = tuple(v for v in model_result.parsed_model_keys.values())
if model_key not in model_dict: if model_key not in model_dict:
@@ -88,10 +90,21 @@ def write_uncoalesced_dict(
def coalesced_dict( def coalesced_dict(
uncoalesced_model_dict: typing.Dict[ uncoalesced_model_dict: typing.Dict[
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]] typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
] ],
minimum_count: float = 0.1,
): ):
"""
pass in uncoalesced dict
the minimum_count field is what we use to make sure our probs are never zero
"""
coalesced_dict = {} coalesced_dict = {}
# we are already iterating so for no reason because performance really doesn't matter let's count the keys ourselves
num_keys = 0
# first pass coalesce
for model_key, model_dict in uncoalesced_model_dict.items(): for model_key, model_dict in uncoalesced_model_dict.items():
num_keys += 1
for calculation in model_dict.values(): for calculation in model_dict.values():
if model_key not in coalesced_dict: if model_key not in coalesced_dict:
coalesced_dict[model_key] = { coalesced_dict[model_key] = {
@@ -104,6 +117,33 @@ def coalesced_dict(
sub_dict["calculations_coalesced"] += 1 sub_dict["calculations_coalesced"] += 1
sub_dict["count"] += calculation["count"] sub_dict["count"] += calculation["count"]
sub_dict["success"] += calculation["success"] sub_dict["success"] += calculation["success"]
# second pass do probability calculation
prior = 1 / num_keys
_logger.info(f"Got {num_keys} model keys, so our prior will be {prior}")
total_weight = 0
for coalesced_model_dict in coalesced_dict.values():
model_weight = (
max(minimum_count, coalesced_model_dict["success"])
/ coalesced_model_dict["count"]
) * prior
total_weight += model_weight
total_prob = 0
for coalesced_model_dict in coalesced_dict.values():
model_weight = (
max(minimum_count, coalesced_model_dict["success"])
/ coalesced_model_dict["count"]
)
prob = model_weight * prior / total_weight
coalesced_model_dict["prob"] = prob
total_prob += prob
_logger.debug(
f"Got a total probability of {total_prob}, which should be close to 1 up to float/rounding error"
)
return coalesced_dict return coalesced_dict
@@ -120,7 +160,7 @@ def write_coalesced_dict(
_logger.info(f"Detected model field names {model_field_names}") _logger.info(f"Detected model field names {model_field_names}")
collected_fieldnames = list(model_field_names) collected_fieldnames = list(model_field_names)
collected_fieldnames.extend(["calculations_coalesced", "success", "count"]) collected_fieldnames.extend(["calculations_coalesced", "success", "count", "prob"])
with open(coalesced_output_filename, "w", newline="") as coalesced_output_file: with open(coalesced_output_filename, "w", newline="") as coalesced_output_file:
writer = csv.DictWriter(coalesced_output_file, fieldnames=collected_fieldnames) writer = csv.DictWriter(coalesced_output_file, fieldnames=collected_fieldnames)
writer.writeheader() writer.writeheader()
@@ -132,6 +172,7 @@ def write_coalesced_dict(
"calculations_coalesced": model_dict["calculations_coalesced"], "calculations_coalesced": model_dict["calculations_coalesced"],
"success": model_dict["success"], "success": model_dict["success"],
"count": model_dict["count"], "count": model_dict["count"],
"prob": model_dict["prob"],
} }
) )
writer.writerow(row) writer.writerow(row)

View File

@@ -6,6 +6,9 @@ import deepdog.cli.probs.dicts
import deepdog.results import deepdog.results
import deepdog.indexify import deepdog.indexify
import pathlib import pathlib
import tqdm
import tqdm.contrib.logging
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
@@ -28,53 +31,65 @@ def set_up_logging(log_file: str):
logging.captureWarnings(True) logging.captureWarnings(True)
def wrapped_main(args: argparse.Namespace): def main(args: argparse.Namespace):
""" """
Main function with passed in arguments and no additional logging setup in case we want to extract out later Main function with passed in arguments and no additional logging setup in case we want to extract out later
""" """
_logger.info(f"args: {args}")
if args.coalesced_keys: with tqdm.contrib.logging.logging_redirect_tqdm():
raise NotImplementedError( _logger.info(f"args: {args}")
"Currently not supporting coalesced keys, but maybe in future"
try:
if args.coalesced_keys:
raise NotImplementedError(
"Currently not supporting coalesced keys, but maybe in future"
)
except AttributeError:
# we don't care if this is missing because we don't actually want it to be there
pass
indexifier = None
if args.indexify_json:
with open(args.indexify_json, "r") as indexify_json_file:
indexify_data = json.load(indexify_json_file)
if args.seed_index > 0:
indexify_data[args.seed_fieldname] = list(range(args.seed_index))
# _logger.debug(f"Indexifier data looks like {indexify_data}")
indexifier = deepdog.indexify.Indexifier(indexify_data)
bayes_dir = pathlib.Path(args.bayesrun_directory)
out_files = [f for f in bayes_dir.iterdir() if f.name.endswith("bayesrun.csv")]
_logger.info(
f"Reading {len(out_files)} bayesrun.csv files in directory {args.bayesrun_directory}"
) )
with open(args.indexify_json, "r") as indexify_json_file: # _logger.info(out_files)
indexify_data = json.load(indexify_json_file) parsed_output_files = [
if args.seed_index > 0: deepdog.results.read_output_file(f, indexifier)
indexify_data[args.seed_fieldname] = list(range(args.seed_index)) for f in tqdm.tqdm(out_files, desc="reading files", leave=False)
# _logger.debug(f"Indexifier data looks like {indexify_data}") ]
indexifier = deepdog.indexify.Indexifier(indexify_data)
bayes_dir = pathlib.Path(args.bayesrun_directory) _logger.info("building uncoalesced dict")
out_files = [f for f in bayes_dir.iterdir() if f.name.endswith("bayesrun.csv")] uncoalesced_dict = deepdog.cli.probs.dicts.build_model_dict(parsed_output_files)
_logger.info(
f"Found {len(out_files)} bayesrun.csv files in directory {args.bayesrun_directory}"
)
# _logger.info(out_files)
parsed_output_files = [
deepdog.results.read_output_file(f, indexifier) for f in out_files
]
_logger.info("building uncoalesced dict") if "uncoalesced_outfile" in args and args.uncoalesced_outfile:
uncoalesced_dict = deepdog.cli.probs.dicts.build_model_dict(parsed_output_files) deepdog.cli.probs.dicts.write_uncoalesced_dict(
args.uncoalesced_outfile, uncoalesced_dict
)
else:
_logger.info("Skipping writing uncoalesced")
if args.uncoalesced_outfile: _logger.info("building coalesced dict")
deepdog.cli.probs.dicts.write_uncoalesced_dict( coalesced = deepdog.cli.probs.dicts.coalesced_dict(uncoalesced_dict)
args.uncoalesced_outfile, uncoalesced_dict
)
else:
_logger.info("Skipping writing uncoalesced")
_logger.info("building coalesced dict") if "coalesced_outfile" in args and args.coalesced_outfile:
coalesced = deepdog.cli.probs.dicts.coalesced_dict(uncoalesced_dict) deepdog.cli.probs.dicts.write_coalesced_dict(
args.coalesced_outfile, coalesced
if args.coalesced_outfile: )
deepdog.cli.probs.dicts.write_coalesced_dict(args.coalesced_outfile, coalesced) else:
else: _logger.info("Skipping writing coalesced")
_logger.info("Skipping writing coalesced")
def main(): def wrapped_main():
args = deepdog.cli.probs.args.parse_args() args = deepdog.cli.probs.args.parse_args()
set_up_logging(args.log_file) set_up_logging(args.log_file)
wrapped_main(args) main(args)

2
poetry.lock generated
View File

@@ -1220,4 +1220,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<3.10" python-versions = ">=3.8.1,<3.10"
content-hash = "b7f33da5b5a2af6bcb2a4c95cf391d04a76047d4f7e5c105b7cc38c73563fa51" content-hash = "828610d9447294e707a6df2affb6ee7947e2be3b567371217265a8b94a9768f6"

View File

@@ -9,6 +9,7 @@ python = ">=3.8.1,<3.10"
pdme = "^0.9.3" pdme = "^0.9.3"
numpy = "1.22.3" numpy = "1.22.3"
scipy = "1.10" scipy = "1.10"
tqdm = "^4.66.2"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = ">=6" pytest = ">=6"
@@ -20,7 +21,7 @@ black = "^22.3.0"
syrupy = "^4.0.8" syrupy = "^4.0.8"
[tool.poetry.scripts] [tool.poetry.scripts]
probs = "deepdog.cli.probs:main" probs = "deepdog.cli.probs:wrapped_main"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]
@@ -41,6 +42,13 @@ module = [
] ]
ignore_missing_imports = true ignore_missing_imports = true
[[tool.mypy.overrides]]
module = [
"tqdm",
"tqdm.*"
]
ignore_missing_imports = true
[tool.semantic_release] [tool.semantic_release]
version_toml = "pyproject.toml:tool.poetry.version" version_toml = "pyproject.toml:tool.poetry.version"
tag_format = "{version}" tag_format = "{version}"