Source code for var_cut.var_cut

"""
Estimates the signal fraction in the data set by performing a cut on a
specified variable in the MC sets.
"""
import traceback
import sys
import warnings
import numpy as np
from matplotlib import pyplot as plt
from utilities.utils import default_rootpaths, default_figpath, \
                            find_cut, stat_error, syst_error
from utilities.import_datasets import loadvars
from utilities.exceptions import IncorrectEfficiencyError, IncorrectIterableError

warnings.formatwarning = lambda msg, *args, **kwargs: f'\n{msg}\n'


[docs]def var_cut(rootpaths=default_rootpaths(), tree='t_M0pipi;1', cut_var='M0_Mpipi', eff=0.90, error_optimization=True, inverse_mode=False, specificity_mode=False, savefig=False, figpath=''): """ Estimates the signal fraction in the data set by performing a cut on a specified variable in the MC sets. :param rootpaths: Tuple containing three .root file paths, for the "background" set (flag=0), the "signal" set (flag=1) and the data one, in this order. :type rootpaths: tuple[str] :param tree: Name of the tree from which to load the events. :type tree: str :param cut_var: Variable to load and test. :type cut_var: str :param eff: Sensitivity required from the test (specificity in specificity mode). :type eff: float :param error_optimization: Performs error optimization instead of using a fixed efficiency value. :type error_optimization: bool :param inverse_mode: Set to ``True`` if the signal events have lower values of the cut variable than the background ones (if it is not manually set when needed to make the cut unbiased, the function gives a warning and does so by itself). :type inverse_mode: bool :param specificity_mode: Set to ``True`` if the efficiency given is the specificity. :type specificity_mode: bool :param savefig: Draw the figure of the variable distribution for the two MC sets and the cut performed. :type savefig: bool :param figpath: Path to where to save the figure. :type figpath: str :return: Estimated fraction of signal (with uncertainties), parameters of the cut algorithm and MC sets of the variable used. :rtype: tuple[float], tuple[float], tuple[numpy.array[float]] """ try: if (type(rootpaths) is not list and type(rootpaths) is not tuple): raise IncorrectIterableError(rootpaths, 3, 'rootpaths') elif len(rootpaths) < 3: raise IncorrectIterableError(rootpaths, 3, 'rootpaths') except IncorrectIterableError: print(traceback.format_exc()) sys.exit() if len(rootpaths) >= 4: msg = '***WARNING*** \nRootpaths given are more than three. \ Using only the first three...\n*************\n' warnings.warn(msg, stacklevel=2) rootpaths = rootpaths[:3] var_pi, var_k = loadvars( rootpaths[0], rootpaths[1], tree, [cut_var], flag_column=False) var_data, _ = loadvars( rootpaths[2], rootpaths[2], tree, [cut_var], flag_column=False) used_eff = 0 df_opt = -99999 try: if type(eff) is not float: raise IncorrectEfficiencyError(eff) elif eff <= 0 or eff >= 1: raise IncorrectEfficiencyError(eff) except IncorrectEfficiencyError: print(traceback.format_exc()) sys.exit() if error_optimization is True: # Enables FOM maximization efficiencies = np.linspace(0.25, 0.999, 200) for tmp_eff in efficiencies: tmp_cut, tmp_misid = find_cut(var_pi, var_k, tmp_eff, inverse_mode=inverse_mode, specificity_mode=False) tmp_frac = ((var_data < tmp_cut).sum()/var_data.size-tmp_misid)/(tmp_eff - tmp_misid) \ if inverse_mode else ((var_data > tmp_cut).sum()/var_data.size-tmp_misid)/(tmp_eff - tmp_misid) tmp_dfopt = -np.sqrt( stat_error(tmp_frac, var_data.size, tmp_eff, tmp_misid)**2 + syst_error(tmp_frac, (var_pi.size, var_k.size), tmp_eff, tmp_misid)**2) if tmp_dfopt >= df_opt: df_opt = tmp_dfopt used_eff = tmp_eff else: used_eff = eff cut, misid = find_cut(var_pi, var_k, used_eff, inverse_mode=inverse_mode, specificity_mode=specificity_mode) if (specificity_mode is not True and misid > used_eff) or \ (specificity_mode is True and 1 - used_eff > misid): msg = f'***WARNING***\nInverse mode was called as {inverse_mode} in \ var_cut, but the test is not unbiased this way; switching to \ inverse_mode = {not inverse_mode} \n*************\n' warnings.warn(msg, stacklevel=2) inverse_mode = not inverse_mode cut, misid = find_cut(var_pi, var_k, used_eff, inverse_mode=inverse_mode, specificity_mode=specificity_mode) if savefig: nbins = 300 range = (min(min(var_pi), min(var_k)), max(max(var_pi), max(var_k))) plt.figure('Cut on ' + cut_var) plt.hist(var_pi, nbins, range=range, histtype='step', color='red', label=f'{cut_var} for Pions') plt.hist(var_k, nbins, range=range, histtype='step', color='blue', label=f'{cut_var} for Kaons') plt.axvline(x=cut, color='green', label=f'{cut_var} Cut for {round(used_eff,3)} efficiency') plt.title(f'Varibale Cut on {cut_var}') plt.xlabel(cut_var) plt.ylabel( f'Events per {round((range[1]-range[0])/nbins, 4)} [{cut_var}]') plt.legend() plt.draw() plt.savefig(default_figpath(f'cut_{cut_var}')) \ if figpath == '' else plt.savefig(f'{figpath}/cut_{cut_var}.png') fraction = ((var_data < cut).sum()/var_data.size - misid)/(used_eff - misid) \ if inverse_mode else ((var_data > cut).sum()/var_data.size-misid)/(used_eff - misid) if specificity_mode: used_eff, misid = misid, 1-used_eff fraction = ((var_data < cut).sum()/var_data.size - misid)/(used_eff - misid) \ if inverse_mode else ((var_data > cut).sum()/var_data.size-misid)/(used_eff - misid) df_stat = stat_error(fraction, var_data.size, used_eff, misid) df_syst = syst_error(fraction, (var_pi.size, var_k.size), used_eff, misid) fr = (fraction, df_stat, df_syst) print(f'\n{cut_var} cut is {cut} for {used_eff} efficiency') print( f'Misid is {misid} +- {np.sqrt(misid*(1-misid)/var_pi.size)} for {used_eff} efficiency') print( f'The estimated fraction of K events is {fraction} +- {df_stat} (stat) +- {df_syst} (syst)') cut_parameters = (used_eff, misid, cut) eval_arrays = (var_pi, var_k) return fr, cut_parameters, eval_arrays
if __name__ == '__main__': print('Running this module as main module is not supported. Feel free to \ add a custom main or run the package as a whole (see README.md)')