Source code for var_cut.var_cut

"""
Estimates the signal fraction in the data set by performing a cut on a
specified variable in the MC sets.
"""
import traceback
import sys
import warnings
import numpy as np
from matplotlib import pyplot as plt
from utilities.utils import default_rootpaths, default_figpath, \
                            find_cut, stat_error, syst_error
from utilities.import_datasets import loadvars
from utilities.exceptions import IncorrectEfficiencyError, IncorrectIterableError

warnings.formatwarning = lambda msg, *args, **kwargs: f'\n{msg}\n'


[docs]def var_cut(rootpaths=default_rootpaths(), tree='t_M0pipi;1', cut_var='M0_Mpipi',
            eff=0.90, error_optimization=True, inverse_mode=False,
            specificity_mode=False, savefig=False, figpath=''):
    """
    Estimates the signal fraction in the data set by performing a cut on a
    specified variable in the MC sets.

    :param rootpaths: Tuple containing three .root file paths, for the "background" set (flag=0), the "signal" set (flag=1) and the data one, in this order.
    :type rootpaths: tuple[str]
    :param tree: Name of the tree from which to load the events.
    :type tree: str
    :param cut_var: Variable to load and test.
    :type cut_var: str
    :param eff: Sensitivity required from the test (specificity in specificity mode).
    :type eff: float
    :param error_optimization: Performs error optimization instead of using a fixed efficiency value.
    :type error_optimization: bool
    :param inverse_mode: Set to ``True`` if the signal events have lower values of the cut variable than the background ones (if it is not manually set when needed to make the cut unbiased, the function gives a warning and does so by itself).
    :type inverse_mode: bool
    :param specificity_mode: Set to ``True`` if the efficiency given is the specificity.
    :type specificity_mode: bool
    :param savefig: Draw the figure of the variable distribution for the two MC sets and the cut performed.
    :type savefig: bool
    :param figpath: Path to where to save the figure.
    :type figpath: str
    :return: Estimated fraction of signal (with uncertainties), parameters of the cut algorithm and MC sets of the variable used.
    :rtype: tuple[float], tuple[float], tuple[numpy.array[float]]

    """

    try:
        if (type(rootpaths) is not list and type(rootpaths) is not tuple):
            raise IncorrectIterableError(rootpaths, 3, 'rootpaths')
        elif len(rootpaths) < 3:
            raise IncorrectIterableError(rootpaths, 3, 'rootpaths')
    except IncorrectIterableError:
        print(traceback.format_exc())
        sys.exit()
    if len(rootpaths) >= 4:
        msg = '***WARNING*** \nRootpaths given are more than three. \
Using only the first three...\n*************\n'
        warnings.warn(msg, stacklevel=2)
        rootpaths = rootpaths[:3]

    var_pi, var_k = loadvars(
        rootpaths[0], rootpaths[1], tree, [cut_var], flag_column=False)
    var_data, _ = loadvars(
        rootpaths[2], rootpaths[2], tree, [cut_var], flag_column=False)

    used_eff = 0
    df_opt = -99999

    try:
        if type(eff) is not float:
            raise IncorrectEfficiencyError(eff)
        elif eff <= 0 or eff >= 1:
            raise IncorrectEfficiencyError(eff)
    except IncorrectEfficiencyError:
        print(traceback.format_exc())
        sys.exit()

    if error_optimization is True:  # Enables FOM maximization
        efficiencies = np.linspace(0.25, 0.999, 200)
        for tmp_eff in efficiencies:
            tmp_cut, tmp_misid = find_cut(var_pi, var_k, tmp_eff,
                                          inverse_mode=inverse_mode,
                                          specificity_mode=False)

            tmp_frac = ((var_data < tmp_cut).sum()/var_data.size-tmp_misid)/(tmp_eff - tmp_misid) \
                if inverse_mode else ((var_data > tmp_cut).sum()/var_data.size-tmp_misid)/(tmp_eff - tmp_misid)

            tmp_dfopt = -np.sqrt(
                stat_error(tmp_frac, var_data.size, tmp_eff, tmp_misid)**2
                + syst_error(tmp_frac, (var_pi.size, var_k.size), tmp_eff, tmp_misid)**2)

            if tmp_dfopt >= df_opt:
                df_opt = tmp_dfopt
                used_eff = tmp_eff
    else:
        used_eff = eff

    cut, misid = find_cut(var_pi, var_k, used_eff, inverse_mode=inverse_mode,
                          specificity_mode=specificity_mode)

    if (specificity_mode is not True and misid > used_eff) or \
       (specificity_mode is True and 1 - used_eff > misid):
        msg = f'***WARNING***\nInverse mode was called as {inverse_mode} in \
var_cut, but the test is not unbiased this way; switching to \
inverse_mode = {not inverse_mode} \n*************\n'
        warnings.warn(msg, stacklevel=2)
        inverse_mode = not inverse_mode
        cut, misid = find_cut(var_pi, var_k, used_eff, inverse_mode=inverse_mode,
                              specificity_mode=specificity_mode)

    if savefig:
        nbins = 300
        range = (min(min(var_pi), min(var_k)), max(max(var_pi), max(var_k)))
        plt.figure('Cut on ' + cut_var)
        plt.hist(var_pi, nbins, range=range, histtype='step',
                 color='red', label=f'{cut_var} for Pions')
        plt.hist(var_k, nbins, range=range, histtype='step',
                 color='blue', label=f'{cut_var} for Kaons')
        plt.axvline(x=cut, color='green',
                    label=f'{cut_var} Cut for {round(used_eff,3)} efficiency')
        plt.title(f'Varibale Cut on {cut_var}')
        plt.xlabel(cut_var)
        plt.ylabel(
            f'Events per {round((range[1]-range[0])/nbins, 4)} [{cut_var}]')
        plt.legend()
        plt.draw()
        plt.savefig(default_figpath(f'cut_{cut_var}')) \
            if figpath == '' else plt.savefig(f'{figpath}/cut_{cut_var}.png')

    fraction = ((var_data < cut).sum()/var_data.size - misid)/(used_eff - misid) \
        if inverse_mode else ((var_data > cut).sum()/var_data.size-misid)/(used_eff - misid)

    if specificity_mode:
        used_eff, misid = misid, 1-used_eff
        fraction = ((var_data < cut).sum()/var_data.size - misid)/(used_eff - misid) \
            if inverse_mode else ((var_data > cut).sum()/var_data.size-misid)/(used_eff - misid)

    df_stat = stat_error(fraction, var_data.size, used_eff, misid)

    df_syst = syst_error(fraction, (var_pi.size, var_k.size), used_eff, misid)

    fr = (fraction, df_stat, df_syst)

    print(f'\n{cut_var} cut is {cut} for {used_eff} efficiency')
    print(
        f'Misid is {misid} +- {np.sqrt(misid*(1-misid)/var_pi.size)} for {used_eff} efficiency')
    print(
        f'The estimated fraction of K events is {fraction} +- {df_stat} (stat) +- {df_syst} (syst)')

    cut_parameters = (used_eff, misid, cut)

    eval_arrays = (var_pi, var_k)

    return fr, cut_parameters, eval_arrays


if __name__ == '__main__':
    print('Running this module as main module is not supported. Feel free to \
add a custom main or run the package as a whole (see README.md)')
Source code for var_cut.var_cut

cmepda-PiKclassifier

Navigation

Related Topics