Skip to content
Snippets Groups Projects
Select Git revision
  • 451d79c31e64197bc33e7ba60f25f651e352ae8b
  • master default protected
2 results

__init__.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    ANN_Data_Generator.py 9.65 KiB
    # -*- coding: utf-8 -*-
    """
    @author: Soraya Terrab (sorayaterrab), Laura C. Kühle
    
    """
    import os
    import timeit
    import numpy as np
    
    import DG_Approximation
    
    
    class TrainingDataGenerator(object):
        """Class for training data generator.
    
        Generate random training data for given initial conditions.
    
        Attributes
        ----------
        smooth_functions : list
            List of smooth initial/continuous conditions.
        troubled_functions : list
            List of discontinuous initial conditions.
        data_dir : str
            Path to directory in which training data is saved.
    
        Methods
        -------
        build_training_data()
            Builds random training data.
    
        """
        def __init__(self, initial_conditions, left_bound=-1, right_bound=1, balance=0.5,
                     stencil_length=3, directory='test_data'):
            """Initializes TrainingDataGenerator.
    
            Parameters
            ----------
            initial_conditions : list
                List of names of initial conditions for training.
            left_bound : float, optional
                Left boundary of interval. Default: -1.
            right_bound : float, optional
                Right boundary of interval. Default: 1.
            balance: float, optional
                Ratio between smooth and discontinuous training data. Default: 0.5.
            stencil_length : int, optional
                Size of training data array. Default: 3.
            directory : str, optional
                Path to directory in which training data is saved. Default: 'test_data'.
    
            """
            self._balance = balance
            self._left_bound = left_bound
            self._right_bound = right_bound
    
            # Set stencil length
            if stencil_length % 2 == 0:
                raise ValueError('Invalid stencil length (even value): "%d"' % stencil_length)
            self._stencil_length = stencil_length
    
            # Separate smooth and discontinuous initial conditions
            self._smooth_functions = []
            self._troubled_functions = []
            for function in initial_conditions:
                if function['function'].is_smooth():
                    self._smooth_functions.append(function)
                else:
                    self._troubled_functions.append(function)
    
            # Set directory
            self._data_dir = directory
            if not os.path.exists(self._data_dir):
                os.makedirs(self._data_dir)
    
        def build_training_data(self, num_samples):
            """Builds random training data.
    
            Creates training data consisting of random ANN input and saves it.
    
            Parameters
            ----------
            num_samples : int
                Number of training data samples to generate.
    
            Returns
            -------
            data_dict : dict
                Dictionary containing input (normalized and non-normalized) and output data.
    
            """
            tic = timeit.default_timer()
            print('Calculating training data...\n')
            data_dict = self._calculate_data_set(num_samples)
            print('Finished calculating training data!')
    
            self._save_data(data_dict)
            toc = timeit.default_timer()
            print('Total runtime:', toc-tic)
            return data_dict
    
        def _calculate_data_set(self, num_samples):
            """Calculates random training data of given stencil length.
    
            Creates training data with a given ratio between smooth and discontinuous samples and
            fixed stencil length.
    
            Parameters
            ----------
            num_samples : int
                Number of training data samples to generate.
    
            Returns
            -------
            dict
                Dictionary containing input (normalized and non-normalized) and output data.
    
            """
            num_smooth_samples = round(num_samples * self._balance)
            smooth_input, smooth_output = self._generate_cell_data(num_smooth_samples,
                                                                   self._smooth_functions, True)
    
            num_troubled_samples = num_samples - num_smooth_samples
            troubled_input, troubled_output = self._generate_cell_data(num_troubled_samples,
                                                                       self._troubled_functions, False)
    
            # Merge Data
            input_matrix = np.concatenate((smooth_input, troubled_input), axis=0)
            output_matrix = np.concatenate((smooth_output, troubled_output), axis=0)
    
            # Shuffle data while keeping correct input/output matches
            order = np.random.permutation(num_smooth_samples + num_troubled_samples)
            input_matrix = input_matrix[order]
            output_matrix = output_matrix[order]
    
            # Create normalized input data
            norm_input_matrix = self._normalize_data(input_matrix)
    
            return {'input': input_matrix, 'output': output_matrix,
                    'normalized_input': norm_input_matrix}
    
        def _generate_cell_data(self, num_samples, initial_conditions, is_smooth):
            """Generates random training input and output.
    
            Generates random training input and output for either smooth or discontinuous
            initial_conditions.
    
            Parameters
            ----------
            num_samples : int
                Number of training data samples to generate.
            initial_conditions : list
                List of names of initial conditions for training.
            is_smooth : boolean
                Flag whether initial conditions are smooth.
    
            Returns
            -------
            input_data : np.array
                Array containing input data.
            output_data : np.array
                Array containing output data.
    
            """
            troubled_indicator = 'without' if is_smooth else 'with'
            print('Calculating data ' + troubled_indicator + ' troubled cells...')
            print('Samples to complete:', num_samples)
            tic = timeit.default_timer()
    
            input_data = np.zeros((num_samples, self._stencil_length+2))
            num_init_cond = len(initial_conditions)
            count = 0
            for i in range(num_samples):
                # Select and initialize initial condition
                function_id = i % num_init_cond
                initial_condition = initial_conditions[function_id]['function']
                initial_condition.randomize(initial_conditions[function_id]['config'])
    
                # Build random stencil of given length
                interval, centers, spacing = self._build_stencil()
                left_bound, right_bound = interval
                centers = [center[0] for center in centers]
    
                # Induce adjustment to capture troubled cells
                adjustment = 0 if initial_condition.is_smooth else centers[self._stencil_length//2]
                initial_condition.induce_adjustment(-spacing[0]/3)
    
                # Calculate basis coefficients for stencil
                polynomial_degree = np.random.randint(1, high=5)
                dg_scheme = DG_Approximation.DGScheme(
                    'NoDetection', polynomial_degree=polynomial_degree,
                    num_grid_cells=self._stencil_length, left_bound=left_bound, right_bound=right_bound,
                    quadrature='Gauss', quadrature_config={'num_eval_points': polynomial_degree+1})
                input_data[i] = dg_scheme.build_training_data(adjustment, self._stencil_length,
                                                              initial_condition)
    
                count += 1
                if count % 1000 == 0:
                    print(str(count) + ' samples completed.')
    
            toc = timeit.default_timer()
            print('Finished calculating data ' + troubled_indicator + ' troubled cells!')
            print('Calculation time:', toc-tic, '\n')
    
            # Set output data
            output_data = np.zeros((num_samples, 2))
            output_index = 1 if is_smooth else 0
            output_data[:, output_index] = np.ones(num_samples)
    
            return input_data, output_data
    
        def _build_stencil(self):
            """Builds random stencil.
    
            Calculates fixed number of cell centers around a random point in a given 1D domain.
    
            Returns
            -------
            interval : np.array
                List containing left and right bound of interval.
            stencil : np.array
                List of cell centers in stencil.
            grid_spacing: float
                Length of cell in grid.
    
            """
            # Select random cell length
            grid_spacing = 2 / (2 ** np.random.randint(3, high=9, size=1))
    
            # Pick random point between left and right bound
            point = np.random.uniform(self._left_bound, self._right_bound)
    
            # Adjust grid spacing if necessary for stencil creation
            while point - self._stencil_length/2 * grid_spacing < self._left_bound\
                    or point + self._stencil_length/2 * grid_spacing > self._right_bound:
                grid_spacing = grid_spacing / 2
    
            # Build x-point stencil
            interval = np.array([point - self._stencil_length/2 * grid_spacing,
                                 point + self._stencil_length/2 * grid_spacing])
            stencil = np.array([point + factor * grid_spacing
                                for factor in range(-(self._stencil_length//2),
                                                    self._stencil_length//2 + 1)])
            return interval, stencil, grid_spacing
    
        @staticmethod
        def _normalize_data(input_data):
            """Normalize data.
    
            Parameters
            ----------
            input_data : np.array
                Array containing input data.
    
            Returns
            -------
            np.array
                Array containing normalized input data.
    
            """
            normalized_input_data = []
            for entry in input_data:
                max_function_value = max(max(np.absolute(entry)), 1)
                normalized_input_data.append(entry / max_function_value)
            return np.array(normalized_input_data)
    
        def _save_data(self, data):
            """Saves data."""
            print('Saving training data.')
            for key in data.keys():
                name = self._data_dir + '/' + key + '_data.npy'
                np.save(name, data[key])