Source code for twaml._apps

"""
twaml command line applications
"""

import argparse
from twaml.data import from_root
import twaml.utils
import yaml
import pandas as pd


[docs]def root2pytables(): """command line application which converts a set of ROOT files into a pytables HDF5 file via the :meth:`twaml.data.from_root` function and the :meth:`twaml.data.dataset.to_pytables` member function of the :class:`twaml.data.dataset` class. """ parser = argparse.ArgumentParser( description=( "Convert ROOT files to a pytables hdf5 dataset " "via twaml.data.root_dataset and " "twaml.data.dataset.to_pytables" ) ) # fmt: off parser.add_argument("-i", "--input-files", type=str, nargs="+", required=True, help="input ROOT files") parser.add_argument("-n", "--name", type=str, required=True, help="dataset name (required when reading back into twaml.data.dataset)") parser.add_argument("-o", "--out-file", type=str, required=True, help="Output h5 file (existing file will be overwritten)") parser.add_argument("-b", "--branches", type=str, nargs="+", required=False, help="branches to save (defaults to all)") parser.add_argument("--tree-name", type=str, required=False, default="WtLoop_nominal", help="tree name") parser.add_argument("--weight-name", type=str, required=False, default="weight_nominal", help="weight branch name") parser.add_argument("--auxweights", type=str, nargs="+", required=False, help="extra auxiliary weights to save") parser.add_argument("--selection", type=str, required=False, help=("A selection string or YAML file containing a map of selections " "(see `selection` argument docs in `twaml.data.from_root`)")) parser.add_argument("--detect-weights", action="store_true", help="detect weights in the dataset, --auxweights overrides this") parser.add_argument("--nthreads", type=int, default=1, required=False, help="number of threads to use via ThreadPoolExecutor") parser.add_argument("--aggro-strip", action="store_true", help="call the `aggressively_strip()` function on the dataset before saving") parser.add_argument("--table-format", action="store_true", help="Use the 'table' format keyword when calling DataFrame's to_hdf function") parser.add_argument("--use-lz4", action="store_true", help="Use lz4 compression") # fmt: on args = parser.parse_args() if not args.out_file.endswith(".h5"): raise ValueError("--out-file argument must end in .h5") to_hdf_kw = {} if args.table_format: to_hdf_kw["format"] = "table" if args.use_lz4: to_hdf_kw["complib"] = "blosc:lz4" ## if selection is not none and is a file ending in .yml or .yaml ## we do the yaml based selections. also a shortcut is implemented ## as a special case if args.selection is not None: if args.selection == "freq_shortcut": selection_yaml = { "r1j1b": twaml.utils.SELECTION_1j1b, "r2j1b": twaml.utils.SELECTION_2j1b, "r2j2b": twaml.utils.SELECTION_2j2b, "r3j1b": twaml.utils.SELECTION_3j1b, "r3jHb": twaml.utils.SELECTION_3jHb, } elif args.selection.endswith(".yml") or args.selection.endswith(".yaml"): with open(args.selection) as f: selection_yaml = yaml.full_load(f) full_ds = from_root( args.input_files, name=args.name, tree_name=args.tree_name, weight_name=args.weight_name, branches=args.branches, auxweights=args.auxweights, detect_weights=args.detect_weights, nthreads=args.nthreads if args.nthreads > 1 else None, wtloop_meta=True, ) selected_dses = full_ds.apply_selections(selection_yaml) anchor = args.out_file.split(".h5")[0] for sdk, sdv in selected_dses.items(): if args.aggro_strip: with pd.option_context("mode.chained_assignment", None): sdv.aggressively_strip() sdv.to_pytables(f"{anchor}_{sdk}.h5", to_hdf_kw=to_hdf_hw) return 0 ## otherwise just take the string or None ds = from_root( args.input_files, name=args.name, tree_name=args.tree_name, selection=args.selection, weight_name=args.weight_name, branches=args.branches, auxweights=args.auxweights, detect_weights=args.detect_weights, nthreads=args.nthreads if args.nthreads > 1 else None, aggressively_strip=args.aggro_strip, wtloop_meta=True, ) ds.to_pytables(args.out_file, to_hdf_kw=to_hdf_kw) return 0