feat: better management of cli wrapper
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
from deepdog.cli.probs.main import main
|
from deepdog.cli.probs.main import wrapped_main
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"main",
|
"wrapped_main",
|
||||||
]
|
]
|
||||||
|
@@ -27,8 +27,8 @@ def parse_args() -> argparse.Namespace:
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--indexify-json",
|
"--indexify-json",
|
||||||
help="A json file with the indexify config for parsing job indexes",
|
help="A json file with the indexify config for parsing job indexes. Will skip if not present",
|
||||||
default="indexes.json",
|
default="",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--seed-index",
|
"--seed-index",
|
||||||
|
@@ -2,6 +2,7 @@ import typing
|
|||||||
from deepdog.results import BayesrunOutput
|
from deepdog.results import BayesrunOutput
|
||||||
import logging
|
import logging
|
||||||
import csv
|
import csv
|
||||||
|
import tqdm
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -21,7 +22,8 @@ def build_model_dict(
|
|||||||
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
||||||
] = {}
|
] = {}
|
||||||
|
|
||||||
for out in bayes_outputs:
|
_logger.info("building model dict")
|
||||||
|
for out in tqdm.tqdm(bayes_outputs, desc="reading outputs", leave=False):
|
||||||
for model_result in out.results:
|
for model_result in out.results:
|
||||||
model_key = tuple(v for v in model_result.parsed_model_keys.values())
|
model_key = tuple(v for v in model_result.parsed_model_keys.values())
|
||||||
if model_key not in model_dict:
|
if model_key not in model_dict:
|
||||||
@@ -88,10 +90,21 @@ def write_uncoalesced_dict(
|
|||||||
def coalesced_dict(
|
def coalesced_dict(
|
||||||
uncoalesced_model_dict: typing.Dict[
|
uncoalesced_model_dict: typing.Dict[
|
||||||
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
typing.Tuple, typing.Dict[typing.Tuple, typing.Dict["str", typing.Any]]
|
||||||
]
|
],
|
||||||
|
minimum_count: float = 0.1,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
pass in uncoalesced dict
|
||||||
|
the minimum_count field is what we use to make sure our probs are never zero
|
||||||
|
"""
|
||||||
coalesced_dict = {}
|
coalesced_dict = {}
|
||||||
|
|
||||||
|
# we are already iterating so for no reason because performance really doesn't matter let's count the keys ourselves
|
||||||
|
num_keys = 0
|
||||||
|
|
||||||
|
# first pass coalesce
|
||||||
for model_key, model_dict in uncoalesced_model_dict.items():
|
for model_key, model_dict in uncoalesced_model_dict.items():
|
||||||
|
num_keys += 1
|
||||||
for calculation in model_dict.values():
|
for calculation in model_dict.values():
|
||||||
if model_key not in coalesced_dict:
|
if model_key not in coalesced_dict:
|
||||||
coalesced_dict[model_key] = {
|
coalesced_dict[model_key] = {
|
||||||
@@ -104,6 +117,33 @@ def coalesced_dict(
|
|||||||
sub_dict["calculations_coalesced"] += 1
|
sub_dict["calculations_coalesced"] += 1
|
||||||
sub_dict["count"] += calculation["count"]
|
sub_dict["count"] += calculation["count"]
|
||||||
sub_dict["success"] += calculation["success"]
|
sub_dict["success"] += calculation["success"]
|
||||||
|
|
||||||
|
# second pass do probability calculation
|
||||||
|
|
||||||
|
prior = 1 / num_keys
|
||||||
|
_logger.info(f"Got {num_keys} model keys, so our prior will be {prior}")
|
||||||
|
|
||||||
|
total_weight = 0
|
||||||
|
for coalesced_model_dict in coalesced_dict.values():
|
||||||
|
model_weight = (
|
||||||
|
max(minimum_count, coalesced_model_dict["success"])
|
||||||
|
/ coalesced_model_dict["count"]
|
||||||
|
) * prior
|
||||||
|
total_weight += model_weight
|
||||||
|
|
||||||
|
total_prob = 0
|
||||||
|
for coalesced_model_dict in coalesced_dict.values():
|
||||||
|
model_weight = (
|
||||||
|
max(minimum_count, coalesced_model_dict["success"])
|
||||||
|
/ coalesced_model_dict["count"]
|
||||||
|
)
|
||||||
|
prob = model_weight * prior / total_weight
|
||||||
|
coalesced_model_dict["prob"] = prob
|
||||||
|
total_prob += prob
|
||||||
|
|
||||||
|
_logger.debug(
|
||||||
|
f"Got a total probability of {total_prob}, which should be close to 1 up to float/rounding error"
|
||||||
|
)
|
||||||
return coalesced_dict
|
return coalesced_dict
|
||||||
|
|
||||||
|
|
||||||
@@ -120,7 +160,7 @@ def write_coalesced_dict(
|
|||||||
_logger.info(f"Detected model field names {model_field_names}")
|
_logger.info(f"Detected model field names {model_field_names}")
|
||||||
|
|
||||||
collected_fieldnames = list(model_field_names)
|
collected_fieldnames = list(model_field_names)
|
||||||
collected_fieldnames.extend(["calculations_coalesced", "success", "count"])
|
collected_fieldnames.extend(["calculations_coalesced", "success", "count", "prob"])
|
||||||
with open(coalesced_output_filename, "w", newline="") as coalesced_output_file:
|
with open(coalesced_output_filename, "w", newline="") as coalesced_output_file:
|
||||||
writer = csv.DictWriter(coalesced_output_file, fieldnames=collected_fieldnames)
|
writer = csv.DictWriter(coalesced_output_file, fieldnames=collected_fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
@@ -132,6 +172,7 @@ def write_coalesced_dict(
|
|||||||
"calculations_coalesced": model_dict["calculations_coalesced"],
|
"calculations_coalesced": model_dict["calculations_coalesced"],
|
||||||
"success": model_dict["success"],
|
"success": model_dict["success"],
|
||||||
"count": model_dict["count"],
|
"count": model_dict["count"],
|
||||||
|
"prob": model_dict["prob"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
@@ -6,6 +6,9 @@ import deepdog.cli.probs.dicts
|
|||||||
import deepdog.results
|
import deepdog.results
|
||||||
import deepdog.indexify
|
import deepdog.indexify
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import tqdm
|
||||||
|
import tqdm.contrib.logging
|
||||||
|
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -28,53 +31,65 @@ def set_up_logging(log_file: str):
|
|||||||
logging.captureWarnings(True)
|
logging.captureWarnings(True)
|
||||||
|
|
||||||
|
|
||||||
def wrapped_main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
"""
|
"""
|
||||||
Main function with passed in arguments and no additional logging setup in case we want to extract out later
|
Main function with passed in arguments and no additional logging setup in case we want to extract out later
|
||||||
"""
|
"""
|
||||||
_logger.info(f"args: {args}")
|
|
||||||
|
|
||||||
if args.coalesced_keys:
|
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||||
raise NotImplementedError(
|
_logger.info(f"args: {args}")
|
||||||
"Currently not supporting coalesced keys, but maybe in future"
|
|
||||||
|
try:
|
||||||
|
if args.coalesced_keys:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"Currently not supporting coalesced keys, but maybe in future"
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
# we don't care if this is missing because we don't actually want it to be there
|
||||||
|
pass
|
||||||
|
|
||||||
|
indexifier = None
|
||||||
|
if args.indexify_json:
|
||||||
|
with open(args.indexify_json, "r") as indexify_json_file:
|
||||||
|
indexify_data = json.load(indexify_json_file)
|
||||||
|
if args.seed_index > 0:
|
||||||
|
indexify_data[args.seed_fieldname] = list(range(args.seed_index))
|
||||||
|
# _logger.debug(f"Indexifier data looks like {indexify_data}")
|
||||||
|
indexifier = deepdog.indexify.Indexifier(indexify_data)
|
||||||
|
|
||||||
|
bayes_dir = pathlib.Path(args.bayesrun_directory)
|
||||||
|
out_files = [f for f in bayes_dir.iterdir() if f.name.endswith("bayesrun.csv")]
|
||||||
|
_logger.info(
|
||||||
|
f"Reading {len(out_files)} bayesrun.csv files in directory {args.bayesrun_directory}"
|
||||||
)
|
)
|
||||||
with open(args.indexify_json, "r") as indexify_json_file:
|
# _logger.info(out_files)
|
||||||
indexify_data = json.load(indexify_json_file)
|
parsed_output_files = [
|
||||||
if args.seed_index > 0:
|
deepdog.results.read_output_file(f, indexifier)
|
||||||
indexify_data[args.seed_fieldname] = list(range(args.seed_index))
|
for f in tqdm.tqdm(out_files, desc="reading files", leave=False)
|
||||||
# _logger.debug(f"Indexifier data looks like {indexify_data}")
|
]
|
||||||
indexifier = deepdog.indexify.Indexifier(indexify_data)
|
|
||||||
|
|
||||||
bayes_dir = pathlib.Path(args.bayesrun_directory)
|
_logger.info("building uncoalesced dict")
|
||||||
out_files = [f for f in bayes_dir.iterdir() if f.name.endswith("bayesrun.csv")]
|
uncoalesced_dict = deepdog.cli.probs.dicts.build_model_dict(parsed_output_files)
|
||||||
_logger.info(
|
|
||||||
f"Found {len(out_files)} bayesrun.csv files in directory {args.bayesrun_directory}"
|
|
||||||
)
|
|
||||||
# _logger.info(out_files)
|
|
||||||
parsed_output_files = [
|
|
||||||
deepdog.results.read_output_file(f, indexifier) for f in out_files
|
|
||||||
]
|
|
||||||
|
|
||||||
_logger.info("building uncoalesced dict")
|
if "uncoalesced_outfile" in args and args.uncoalesced_outfile:
|
||||||
uncoalesced_dict = deepdog.cli.probs.dicts.build_model_dict(parsed_output_files)
|
deepdog.cli.probs.dicts.write_uncoalesced_dict(
|
||||||
|
args.uncoalesced_outfile, uncoalesced_dict
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_logger.info("Skipping writing uncoalesced")
|
||||||
|
|
||||||
if args.uncoalesced_outfile:
|
_logger.info("building coalesced dict")
|
||||||
deepdog.cli.probs.dicts.write_uncoalesced_dict(
|
coalesced = deepdog.cli.probs.dicts.coalesced_dict(uncoalesced_dict)
|
||||||
args.uncoalesced_outfile, uncoalesced_dict
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
_logger.info("Skipping writing uncoalesced")
|
|
||||||
|
|
||||||
_logger.info("building coalesced dict")
|
if "coalesced_outfile" in args and args.coalesced_outfile:
|
||||||
coalesced = deepdog.cli.probs.dicts.coalesced_dict(uncoalesced_dict)
|
deepdog.cli.probs.dicts.write_coalesced_dict(
|
||||||
|
args.coalesced_outfile, coalesced
|
||||||
if args.coalesced_outfile:
|
)
|
||||||
deepdog.cli.probs.dicts.write_coalesced_dict(args.coalesced_outfile, coalesced)
|
else:
|
||||||
else:
|
_logger.info("Skipping writing coalesced")
|
||||||
_logger.info("Skipping writing coalesced")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def wrapped_main():
|
||||||
args = deepdog.cli.probs.args.parse_args()
|
args = deepdog.cli.probs.args.parse_args()
|
||||||
set_up_logging(args.log_file)
|
set_up_logging(args.log_file)
|
||||||
wrapped_main(args)
|
main(args)
|
||||||
|
2
poetry.lock
generated
2
poetry.lock
generated
@@ -1220,4 +1220,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<3.10"
|
python-versions = ">=3.8.1,<3.10"
|
||||||
content-hash = "b7f33da5b5a2af6bcb2a4c95cf391d04a76047d4f7e5c105b7cc38c73563fa51"
|
content-hash = "828610d9447294e707a6df2affb6ee7947e2be3b567371217265a8b94a9768f6"
|
||||||
|
@@ -9,6 +9,7 @@ python = ">=3.8.1,<3.10"
|
|||||||
pdme = "^0.9.3"
|
pdme = "^0.9.3"
|
||||||
numpy = "1.22.3"
|
numpy = "1.22.3"
|
||||||
scipy = "1.10"
|
scipy = "1.10"
|
||||||
|
tqdm = "^4.66.2"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = ">=6"
|
pytest = ">=6"
|
||||||
@@ -20,7 +21,7 @@ black = "^22.3.0"
|
|||||||
syrupy = "^4.0.8"
|
syrupy = "^4.0.8"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
probs = "deepdog.cli.probs:main"
|
probs = "deepdog.cli.probs:wrapped_main"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
@@ -41,6 +42,13 @@ module = [
|
|||||||
]
|
]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
module = [
|
||||||
|
"tqdm",
|
||||||
|
"tqdm.*"
|
||||||
|
]
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
[tool.semantic_release]
|
[tool.semantic_release]
|
||||||
version_toml = "pyproject.toml:tool.poetry.version"
|
version_toml = "pyproject.toml:tool.poetry.version"
|
||||||
tag_format = "{version}"
|
tag_format = "{version}"
|
||||||
|
Reference in New Issue
Block a user