From 89aa2bb800d3bb881784a5ebf17e2e9927a74010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=BChle=2C=20Laura=20Christine=20=28lakue103=29?=
 <laura.kuehle@uni-duesseldorf.de>
Date: Tue, 6 Sep 2022 15:18:00 +0200
Subject: [PATCH] Restructured Boxplot method.

---
 Troubled_Cell_Detector.py | 85 +++++++++++----------------------------
 1 file changed, 23 insertions(+), 62 deletions(-)

diff --git a/Troubled_Cell_Detector.py b/Troubled_Cell_Detector.py
index c943294..058f524 100644
--- a/Troubled_Cell_Detector.py
+++ b/Troubled_Cell_Detector.py
@@ -3,7 +3,7 @@
 @author: Laura C. Kühle, Soraya Terrab (sorayaterrab)
 
 TODO: Vectorize _get_cells() in Boxplot method -> Done
-TODO: Restructure Boxplot method
+TODO: Restructure Boxplot method -> Done
 TODO: Introduce lower/upper extreme outliers in Boxplot
     (each cell is also checked for neighboring domains if existing)
 TODO: Determine max_value for Theoretical only over highest degree
@@ -340,8 +340,6 @@ class Boxplot(WaveletDetector):
         Length of Boxplot whiskers.
     adjust_outer_fences : bool
         Flag whether outer fences should be adjusted using global mean.
-    num_overlapping_cells : int
-        Number of cells overlapping with adjacent folds.
     folds : ndarray
         Array with indices for elements of each fold (including
         overlaps).
@@ -362,19 +360,23 @@ class Boxplot(WaveletDetector):
         self._fold_len = config.pop('fold_len', 16)
         self._whisker_len = config.pop('whisker_len', 3)
         self._adjust_outer_fences = config.pop('adjust_outer_fences', True)
-        self._num_overlapping_cells = config.pop('num_overlapping_cells', 1)
+
+        if self._mesh.num_grid_cells < self._fold_len:
+            self._fold_len = self._mesh.num_grid_cells
+
+        num_overlapping_cells = config.pop('num_overlapping_cells', 1)
         num_folds = self._mesh.num_grid_cells//self._fold_len
-        self._folds = np.zeros([num_folds, self._fold_len
-                                + 2 * self._num_overlapping_cells]).astype(int)
+        self._fold_indices = np.zeros([num_folds,
+                                       self._fold_len + 2 *
+                                       num_overlapping_cells]).astype(np.int32)
         for fold in range(num_folds):
-            self._folds[fold] = np.array(
+            self._fold_indices[fold] = np.array(
                 [i % self._mesh.num_grid_cells for i in range(
-                    fold * self._fold_len - self._num_overlapping_cells,
-                    (fold+1) * self._fold_len + self._num_overlapping_cells)])
-        # print(self._folds)
+                    fold * self._fold_len - num_overlapping_cells,
+                    (fold+1) * self._fold_len + num_overlapping_cells)])
 
     def _get_cells(self, multiwavelet_coeffs, projection):
-        """Calculates troubled cells using multiwavelet coefficients.
+        """Calculate troubled cells using multiwavelet coefficients.
 
         Parameters
         ----------
@@ -386,19 +388,19 @@ class Boxplot(WaveletDetector):
         Returns
         -------
         list
-            List of indices for all detected troubled cells.
+            List of indices of all detected troubled cells.
 
         """
+        # Select and sort fold domains
         coeffs = multiwavelet_coeffs[0]
+        folds = coeffs[self._fold_indices]
+        folds.sort()
 
-        if self._mesh.num_grid_cells < self._fold_len:
-            self._fold_len = self._mesh.num_grid_cells
-
+        # Determine quartile parameters
         boundary_index = self._fold_len//4
         balance_factor = self._fold_len/4.0 - boundary_index
 
-        folds = coeffs[self._folds]
-        folds.sort()
+        # Determine bounds based on first and third quartiles of a boxplot
         first_quartiles = (1-balance_factor) \
             * folds[:, boundary_index-1] \
             + balance_factor * folds[:, boundary_index]
@@ -411,59 +413,18 @@ class Boxplot(WaveletDetector):
         upper_bounds = third_quartiles + self._whisker_len * (
                 third_quartiles-first_quartiles)
 
+        # Adjust outer fences if flag is set
         if self._adjust_outer_fences:
             global_mean = np.mean(abs(coeffs))
             lower_bounds[lower_bounds > -global_mean] = -global_mean
             upper_bounds[upper_bounds < global_mean] = global_mean
 
-        troubled_cells_new = np.flatnonzero(np.logical_or(
+        # Select outliers as troubled cells
+        troubled_cells = np.flatnonzero(np.logical_or(
             coeffs < np.repeat(lower_bounds, self._fold_len),
             coeffs > np.repeat(upper_bounds, self._fold_len))).tolist()
 
-        # num_folds = self._mesh.num_grid_cells//self._fold_len
-        # troubled_cells = []
-        #
-        # lower_bound = np.zeros(num_folds)
-        # upper_bound = np.zeros(num_folds)
-        #
-        # for fold in range(num_folds):
-        #     sorted_fold = sorted(coeffs[self._folds[fold]])
-        #
-        #     first_quartile = (1-balance_factor) \
-        #         * sorted_fold[boundary_index-1] \
-        #         + balance_factor * sorted_fold[boundary_index]
-        #     third_quartile = (1-balance_factor) \
-        #         * sorted_fold[3*boundary_index-1]\
-        #         + balance_factor * sorted_fold[3*boundary_index]
-        #
-        #     lower_bound[fold] = first_quartile \
-        #         - self._whisker_len * (third_quartile-first_quartile)
-        #     upper_bound[fold] = third_quartile \
-        #         + self._whisker_len * (third_quartile-first_quartile)
-        #
-        #     # Adjust outer fences if flag is set
-        #     if self._adjust_outer_fences:
-        #         global_mean = np.mean(abs(coeffs))
-        #         lower_bound[fold] = min(-global_mean, lower_bound[fold])
-        #         upper_bound[fold] = max(global_mean, upper_bound[fold])
-        #
-        #     # Check for extreme outlier and add respective cells
-        #     for cell in self._folds[
-        #                 fold, self._num_overlapping_cells:
-        #                 -self._num_overlapping_cells]:
-        #         if (coeffs[cell] > upper_bound[fold]) \
-        #                 or (coeffs[cell] < lower_bound[fold]):
-        #             troubled_cells.append(int(cell))
-        #
-        # same = np.all(sorted(troubled_cells) == troubled_cells_new)
-        # if not same:
-        #     print(np.all(lower_bounds == lower_bound),
-        #           np.all(upper_bounds == upper_bound))
-        #     print(sorted(troubled_cells))
-        #     print(troubled_cells_new)
-
-        return troubled_cells_new
-        # return sorted(troubled_cells)
+        return troubled_cells
 
 
 class Theoretical(WaveletDetector):
-- 
GitLab