diff --git a/ANN_Data_Generator.py b/ANN_Data_Generator.py index 629b08c352db14ca2b49d78d500696dafeb4f8e3..52d671987dea73a71497ff3dd22e23fb3f888bb6 100644 --- a/ANN_Data_Generator.py +++ b/ANN_Data_Generator.py @@ -3,6 +3,9 @@ @author: Soraya Terrab (sorayaterrab), Laura C. Kühle TODO: Improve '_generate_cell_data' +TODO: Extract normalization (At all? Over smooth and troubled separately?) +TODO: Discontinue data splitting -> Done +TODO: Improve verbose output """ @@ -15,7 +18,7 @@ import DG_Approximation class TrainingDataGenerator(object): def __init__(self, initial_conditions, left_bound=-1, right_bound=1, balance=0.5, - stencil_length=3, distribution=None, directory=None): + stencil_length=3, directory=None): self._balance = balance self._left_bound = left_bound self._right_bound = right_bound @@ -25,13 +28,6 @@ class TrainingDataGenerator(object): raise ValueError('Invalid stencil length (even value): "%d"' % stencil_length) self._stencil_length = stencil_length - # Set distribution values - self._distribution = {'train': 1.0} - if distribution is not None: - if sum([distribution[key] for key in distribution]) != 1.0: - raise ValueError('Invalid distribution directory: "%s"' % distribution) - self._distribution = distribution - # Separate smooth and discontinuous initial conditions self._smooth_functions = [] self._troubled_functions = [] @@ -42,35 +38,31 @@ class TrainingDataGenerator(object): self._troubled_functions.append(function) # Set directory - self._data_dir = 'data' + self._data_dir = 'test_data' if directory is not None: self._data_dir = directory if not os.path.exists(self._data_dir): os.makedirs(self._data_dir) def build_training_data(self, num_samples, normalize): - data = {} - for set_name in self._distribution: - print('Calculating ' + set_name + ' data...') - input_data, output_data = self._calculate_data_set( - round(self._distribution[set_name]*num_samples), normalize) - data[set_name] = [input_data, output_data] - print('Finished calculating ' + set_name + ' data!') + print('Calculating training data...') + input_data, output_data = self._calculate_data_set(num_samples, normalize) + data = [input_data, output_data] + print('Finished calculating training data!') self._save_data(data, num_samples, normalize) return data def _save_data(self, data, num_samples, normalize): - for key in data.keys(): - num_set_samples = round(num_samples*self._distribution[key]) - normalize_string = 'non-' if not normalize else '' - name = '__smooth_' + str((num_set_samples*self._balance)/1000) + 'k__troubled_' \ - + str((num_set_samples*(1-self._balance))/1000)\ - + 'k__' + normalize_string + 'normalized.npy' - input_name = self._data_dir + '/' + key + '_input' + name - np.save(input_name, data[key][0]) - output_name = self._data_dir + '/' + key + '_output' + name - np.save(output_name, data[key][1]) + normalize_string = 'non-' if not normalize else '' + name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \ + + str((num_samples*(1-self._balance))/1000)\ + + 'k__' + normalize_string + 'normalized.npy' + + input_name = self._data_dir + '/training_input' + name + np.save(input_name, data[0]) + output_name = self._data_dir + '/training_output' + name + np.save(output_name, data[1]) def _calculate_data_set(self, num_samples, normalize): num_smooth_samples = round(num_samples * self._balance) @@ -201,11 +193,9 @@ functions = [{'function': Initial_Condition.Sine(boundary[0], boundary[1], {}), {'function': Initial_Condition.HeavisideTwoSided(boundary[0], boundary[1], {}), 'config': {'adjustment': 0}}] -generator = TrainingDataGenerator( - functions, distribution={'train': 0.727, 'valid': 0.243, 'test': 0.03}, left_bound=boundary[0], - right_bound=boundary[1]) +generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1]) # generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1]) -sample_number = 66000 +sample_number = 1000 data_1 = generator.build_training_data(sample_number, 0) -data_2 = generator.build_training_data(sample_number, 1) +# data_2 = generator.build_training_data(sample_number, 1)