Source code for pywicta.denoising.abstract_cleaning_algorithm

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright (c) 2016 Jérémie DECOCK (http://www.jdhp.org)

# This script is provided under the terms and conditions of the MIT license:
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import copy
import datetime
import json
import os
import numpy as np
import random
import sys
import time
import traceback

import astropy.units as u

from pywicta.benchmark import assess
from pywicta.image.hillas_parameters import get_hillas_parameters

from pywi.processing.filtering.pixel_clusters import filter_pixels_clusters_stats
from pywi.processing.filtering.pixel_clusters import number_of_pixels_clusters

from pywicta.image.signal_to_border_distance import signal_to_border
from pywicta.image.signal_to_border_distance import signal_to_border_distance
from pywicta.image.signal_to_border_distance import pemax_on_border
from pywicta.io import geometry_converter
from pywicta.io.images import image_generator
import pywicta.io.images

HILLAS_IMPLEMENTATION = 2      # TODO

###############################################################################

[docs]class AbstractCleaningAlgorithm(object):
    """A convenient optional wrapper to simplify the image cleaning analysis.
    
    Common processing to run and assess the image cleaning procedure on a set
    of images and save results. This class gather some common procedures to
    avoid code duplication in image cleaning modules:

    - call the cleaning algorithm on an image set;
    - assess the cleaning procedure using a set of estimators;
    - apply various pre-processing and post-processing procedures (e.g.
      geometry conversion);
    - collect and save metadata, results and intermediate values that are
      useful for analysis;
    - measure and save the execution time;
    - manage exceptions;
    - ...
    
    This abstract class is supposed to be inherited by the others image
    cleaning classes."""

    def __init__(self):
        self.label = "Unknown"  # Name to show in plots
        self.verbose = False    # Debug mode

    def __call__(self, *pargs, **kargs):
        return self.clean_image(*pargs, **kargs)

    def __str__(self):
        return "{}".format(self.algorithm_label)

[docs]    def run(self,
            cleaning_function_params,
            input_file_or_dir_path_list,
            benchmark_method,
            output_file_path,
            plot=False,
            saveplot=None,
            ref_img_as_input=False,     # A hack to easily produce CSV files...
            max_num_img=None,
            tel_id=None,
            event_id=None,
            cam_id=None,
            debug=False,
            rejection_criteria=None):
        """A convenient optional wrapper to simplify the image cleaning analysis.

        Apply the image cleaning analysis on `input_file_or_dir_path_list`,
        apply some pre-processing and post-processing procedures, collect and
        return results, intermediate values and metadata.
        
        Parameters
        ----------
        cleaning_function_params
            A dictionary containing the parameters required for the image
            cleaning method.
        input_file_or_dir_path_list
            A list of file to clean. Can be a list of simtel files, fits files
            or directories containing such files.
        benchmark_method
            The list of estimators to use to assess the image cleaning. If
            `None`, images are cleaned but nothing is returned (can be used
            with e.g. the `plot` and/or `saveplot` options).
        output_file_path
            The result file path (a JSON file).
        plot
            The result of each cleaning is plot if `True`.
        saveplot
            The result of each cleaning is saved if `True`.
        ref_img_as_input
            This option is a hack to easily produce a "flatten" CSV results
            files.
        max_num_img
            The number of images to process among the input set
            (`input_file_or_dir_path_list`).
        debug
            Stop the execution and print the full traceback when an exception
            is encountered if this parameter is `True`. Report exceptions and
            continue with the next input image if this parameter is `False`.
        
        Returns
        -------
        dict
            Results, intermediate values and metadata.
        """

        launch_time = time.perf_counter()

        if benchmark_method is not None:
            io_list = []           # The list of returned dictionaries

        if tel_id is not None:
            tel_id = [tel_id]

        if event_id is not None:
            event_id = [event_id]

        if cam_id is None:
            raise ValueError('cam_id is now mandatory.')

        if cam_id == "ASTRICam":
            integrator_window_width = 1
            integrator_window_shift = 1
        elif cam_id == "CHEC":
            integrator_window_width = 10
            integrator_window_shift = 5
        elif cam_id == "DigiCam":
            integrator_window_width = 5
            integrator_window_shift = 2
        elif cam_id == "FlashCam":
            integrator_window_width = 6
            integrator_window_shift = 3
        elif cam_id == "NectarCam":
            integrator_window_width = 5
            integrator_window_shift = 2
        elif cam_id == "LSTCam":
            integrator_window_width = 5
            integrator_window_shift = 2
        else:
            raise ValueError('Unknown cam_id "{}"'.format(cam_id))

        for image in image_generator(input_file_or_dir_path_list,
                                     max_num_images=max_num_img,
                                     tel_filter_list=tel_id,
                                     ev_filter_list=event_id,
                                     cam_filter_list=[cam_id],
                                     rejection_criteria=rejection_criteria,
                                     ctapipe_format=False,
                                     integrator='LocalPeakIntegrator',
                                     integrator_window_width=integrator_window_width,
                                     integrator_window_shift=integrator_window_shift,
                                     integration_correction=False,
                                     mix_channels=True):

            input_file_path = image.meta['file_path']

            if self.verbose:
                print(input_file_path)

            # `image_dict` contains metadata (to be returned) on the current image
            image_dict = {"input_file_path": input_file_path}

            try:
                # READ THE INPUT FILE #####################################

                if self.verbose:
                    print("TEL{}_EV{}".format(image.meta["tel_id"],
                                              image.meta["event_id"]))

                reference_img = image.reference_image
                pixels_position = image.pixels_position

                if ref_img_as_input:
                    # This option is a hack to easily produce CSV files with
                    # the "null_ref" "cleaning" module...
                    input_img = copy.deepcopy(reference_img)
                else:
                    input_img = image.input_image

                image_dict.update(image.meta)

                # Make the original 1D geom (required for the 2D to 1D geometry
                # conversion, for Tailcut and for Hillas)
                cam_id = image.meta['cam_id']
                cleaning_function_params["cam_id"] = cam_id
                geom1d = geometry_converter.get_geom1d(cam_id)

                if benchmark_method is not None:

                    # FETCH ADDITIONAL IMAGE METADATA #####################

                    image_dict["img_ref_signal_to_border"] = signal_to_border(reference_img)                   # TODO: NaN
                    image_dict["img_ref_signal_to_border_distance"] = signal_to_border_distance(reference_img) # TODO: NaN
                    image_dict["img_ref_pemax_on_border"] = pemax_on_border(reference_img)                     # TODO: NaN

                    delta_pe, delta_abs_pe, delta_num_pixels = filter_pixels_clusters_stats(reference_img)       # TODO: NaN
                    num_islands = number_of_pixels_clusters(reference_img)                                             # TODO: NaN

                    image_dict["img_ref_islands_delta_pe"] = delta_pe
                    image_dict["img_ref_islands_delta_abs_pe"] = delta_abs_pe
                    image_dict["img_ref_islands_delta_num_pixels"] = delta_num_pixels
                    image_dict["img_ref_num_islands"] = num_islands

                    image_dict["img_ref_sum_pe"] = float(np.nansum(reference_img))
                    image_dict["img_ref_min_pe"] = float(np.nanmin(reference_img))
                    image_dict["img_ref_max_pe"] = float(np.nanmax(reference_img))
                    image_dict["img_ref_num_pix"] = int( (reference_img[np.isfinite(reference_img)] > 0).sum() )

                    image_dict["img_in_sum_pe"] = float(np.nansum(input_img))
                    image_dict["img_in_min_pe"] = float(np.nanmin(input_img))
                    image_dict["img_in_max_pe"] = float(np.nanmax(input_img))
                    image_dict["img_in_num_pix"] = int( (input_img[np.isfinite(input_img)] > 0).sum() )

                    reference_img1d = geometry_converter.image_2d_to_1d(reference_img, cam_id)
                    try:
                        hillas_params_2_ref_img = get_hillas_parameters(geom1d, reference_img1d, HILLAS_IMPLEMENTATION)   # TODO GEOM
                    except Exception as e:
                        hillas_params_2_ref_img = float('nan')
                        print(e)
                        #traceback.print_tb(e.__traceback__, file=sys.stdout)

                    image_dict["img_ref_hillas_2_size"] =     float(hillas_params_2_ref_img.size)
                    image_dict["img_ref_hillas_2_cen_x"] =    hillas_params_2_ref_img.cen_x.value
                    image_dict["img_ref_hillas_2_cen_y"] =    hillas_params_2_ref_img.cen_y.value
                    image_dict["img_ref_hillas_2_length"] =   hillas_params_2_ref_img.length.value
                    image_dict["img_ref_hillas_2_width"] =    hillas_params_2_ref_img.width.value
                    image_dict["img_ref_hillas_2_r"] =        hillas_params_2_ref_img.r.value
                    image_dict["img_ref_hillas_2_phi"] =      hillas_params_2_ref_img.phi.to(u.rad).value
                    image_dict["img_ref_hillas_2_psi"] =      hillas_params_2_ref_img.psi.to(u.rad).value
                    try:
                        image_dict["img_ref_hillas_2_miss"] = float(hillas_params_2_ref_img.miss.value)
                    except:
                        image_dict["img_ref_hillas_2_miss"] = None
                    image_dict["img_ref_hillas_2_kurtosis"] = hillas_params_2_ref_img.kurtosis
                    image_dict["img_ref_hillas_2_skewness"] = hillas_params_2_ref_img.skewness

                # CLEAN THE INPUT IMAGE ###################################

                # Copy the image (otherwise some cleaning functions like Tailcut may change it)
                #input_img_copy = copy.deepcopy(input_img)
                input_img_copy = input_img.astype('float64', copy=True)

                cleaning_function_params["output_data_dict"] = {}

                initial_time = time.perf_counter()
                cleaned_img = self.clean_image(input_img_copy, **cleaning_function_params)   # TODO: NaN
                full_clean_execution_time_sec = time.perf_counter() - initial_time

                if benchmark_method is not None:
                    image_dict.update(cleaning_function_params["output_data_dict"])
                    del cleaning_function_params["output_data_dict"]

                # ASSESS OR PRINT THE CLEANED IMAGE #######################

                if benchmark_method is not None:

                    # ASSESS THE CLEANING #################################

                    kwargs = {'geom': geom1d,
                              'hillas_implementation': HILLAS_IMPLEMENTATION}  # TODO GEOM
                    score_tuple, score_name_tuple = assess.assess_image_cleaning(input_img,
                                                                                 cleaned_img,
                                                                                 reference_img,
                                                                                 benchmark_method,
                                                                                 **kwargs)

                    image_dict["img_cleaned_signal_to_border"] = signal_to_border(cleaned_img)
                    image_dict["img_cleaned_signal_to_border_distance"] = signal_to_border_distance(cleaned_img)
                    image_dict["img_cleaned_pemax_on_border"] = pemax_on_border(cleaned_img)

                    image_dict["score"] = score_tuple
                    image_dict["score_name"] = score_name_tuple
                    image_dict["full_clean_execution_time_sec"] = full_clean_execution_time_sec

                    image_dict["img_cleaned_sum_pe"] = float(np.nansum(cleaned_img))
                    image_dict["img_cleaned_min_pe"] = float(np.nanmin(cleaned_img))
                    image_dict["img_cleaned_max_pe"] = float(np.nanmax(cleaned_img))
                    image_dict["img_cleaned_num_pix"] = int( (cleaned_img[np.isfinite(cleaned_img)] > 0).sum() )

                    cleaned_img1d = geometry_converter.image_2d_to_1d(cleaned_img, cam_id)
                    try:
                        hillas_params_2_cleaned_img = get_hillas_parameters(geom1d, cleaned_img1d, HILLAS_IMPLEMENTATION)    # GEOM
                    except Exception as e:
                        hillas_params_2_cleaned_img = float('nan')
                        print(e)
                        #traceback.print_tb(e.__traceback__, file=sys.stdout)

                    try:
                        image_dict["img_cleaned_hillas_2_size"] =     float(hillas_params_2_cleaned_img.size)
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_size"] =     float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_cen_x"] =    hillas_params_2_cleaned_img.cen_x.value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_cen_x"] =    float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_cen_y"] =    hillas_params_2_cleaned_img.cen_y.value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_cen_y"] =    float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_length"] =   hillas_params_2_cleaned_img.length.value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_length"] =   float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_width"] =    hillas_params_2_cleaned_img.width.value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_width"] =    float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_r"] =        hillas_params_2_cleaned_img.r.value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_r"] =        float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_phi"] =      hillas_params_2_cleaned_img.phi.to(u.rad).value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_phi"] =      float('nan')
                        
                    try:
                        image_dict["img_cleaned_hillas_2_psi"] =      hillas_params_2_cleaned_img.psi.to(u.rad).value
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_psi"] =      float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_miss"] = float(hillas_params_2_cleaned_img.miss.value)
                    except:
                        image_dict["img_cleaned_hillas_2_miss"] = float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_kurtosis"] = hillas_params_2_cleaned_img.kurtosis
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_kurtosis"] = float('nan')

                    try:
                        image_dict["img_cleaned_hillas_2_skewness"] = hillas_params_2_cleaned_img.skewness
                    except AttributeError as e:
                        print(e)
                        image_dict["img_cleaned_hillas_2_skewness"] = float('nan')

                # PLOT IMAGES #########################################################

                if plot or (saveplot is not None):
                    image_list = [geometry_converter.image_2d_to_1d(input_img, cam_id),
                                  geometry_converter.image_2d_to_1d(reference_img, cam_id),
                                  geometry_converter.image_2d_to_1d(cleaned_img, cam_id)] 
                    title_list = ["Input image", "Reference image", "Cleaned image"] 
                    geom_list = [geom1d, geom1d, geom1d] 
                    hillas_list = [False, True, True]

                    if plot:
                        pywicta.io.images.plot_list(image_list,
                                                     geom_list=geom_list,
                                                     title_list=title_list,
                                                     hillas_list=hillas_list,
                                                     metadata_dict=image.meta)

                    if saveplot is not None:
                        basename, extension = os.path.splitext(saveplot)
                        plot_file_path = "{}_E{}_T{}{}".format(basename, image.meta["event_id"], image.meta["tel_id"], extension)

                        print("Saving {}".format(plot_file_path))
                        pywicta.io.images.mpl_save_list(image_list,
                                                         geom_list=geom_list,
                                                         output_file_path=plot_file_path,
                                                         title_list=title_list,
                                                         hillas_list=hillas_list,
                                                         metadata_dict=image.meta)

            except Exception as e:
                print("Abort image {}: {} ({})".format(input_file_path, e, type(e)))

                if debug:
                    # The following line print the full trackback
                    traceback.print_tb(e.__traceback__, file=sys.stdout)

                if benchmark_method is not None:

                    # http://docs.python.org/2/library/sys.html#sys.exc_info
                    exc_type, exc_value, exc_traceback = sys.exc_info() # most recent (if any) by default

                    '''
                    Reason this _can_ be bad: If an (unhandled) exception happens AFTER this,
                    or if we do not delete the labels on (not much) older versions of Py, the
                    reference we created can linger.

                    traceback.format_exc/print_exc do this very thing, BUT note this creates a
                    temp scope within the function.
                    '''

                    error_dict = {
                                  'filename': exc_traceback.tb_frame.f_code.co_filename,
                                  'lineno'  : exc_traceback.tb_lineno,
                                  'name'    : exc_traceback.tb_frame.f_code.co_name,
                                  'type'    : exc_type.__name__,
                                  #'message' : exc_value.message
                                  'message' : str(e)
                                 }

                    del(exc_type, exc_value, exc_traceback) # So we don't leave our local labels/objects dangling
                    # This still isn't "completely safe", though!

                    #error_dict = {"type": str(type(e)),
                    #              "message": str(e)}

                    image_dict["error"] = error_dict

            finally:
                if benchmark_method is not None:
                    io_list.append(image_dict)

        if benchmark_method is not None:
            error_list = [image_dict["error"] for image_dict in io_list if "error" in image_dict]
            print("{} images aborted".format(len(error_list)))

            # GENERAL EXPERIMENT METADATA
            output_dict = {}
            output_dict["benchmark_execution_time_sec"] = str(time.perf_counter() - launch_time)
            output_dict["date_time"] = str(datetime.datetime.now())
            output_dict["class_name"] = self.__class__.__name__
            output_dict["algo_code_ref"] = str(self.__class__.clean_image.__code__)
            output_dict["label"] = self.label
            output_dict["cmd"] = " ".join(sys.argv)
            output_dict["algo_params"] = cleaning_function_params

            if "noise_distribution" in output_dict["algo_params"]:
                del output_dict["algo_params"]["noise_distribution"]  # not JSON serializable...

            output_dict["benchmark_method"] = benchmark_method
            output_dict["system"] = " ".join(os.uname())
            output_dict["io"] = io_list

            if output_file_path is not None:
                with open(output_file_path, "w") as fd:
                    json.dump(output_dict, fd, sort_keys=True, indent=4)  # pretty print format

            return output_dict