Skip to content
Snippets Groups Projects
Commit 5985ab19 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Removed training data splitting during generation.

parent 2752f428
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,9 @@
@author: Soraya Terrab (sorayaterrab), Laura C. Kühle
TODO: Improve '_generate_cell_data'
TODO: Extract normalization (At all? Over smooth and troubled separately?)
TODO: Discontinue data splitting -> Done
TODO: Improve verbose output
"""
......@@ -15,7 +18,7 @@ import DG_Approximation
class TrainingDataGenerator(object):
def __init__(self, initial_conditions, left_bound=-1, right_bound=1, balance=0.5,
stencil_length=3, distribution=None, directory=None):
stencil_length=3, directory=None):
self._balance = balance
self._left_bound = left_bound
self._right_bound = right_bound
......@@ -25,13 +28,6 @@ class TrainingDataGenerator(object):
raise ValueError('Invalid stencil length (even value): "%d"' % stencil_length)
self._stencil_length = stencil_length
# Set distribution values
self._distribution = {'train': 1.0}
if distribution is not None:
if sum([distribution[key] for key in distribution]) != 1.0:
raise ValueError('Invalid distribution directory: "%s"' % distribution)
self._distribution = distribution
# Separate smooth and discontinuous initial conditions
self._smooth_functions = []
self._troubled_functions = []
......@@ -42,35 +38,31 @@ class TrainingDataGenerator(object):
self._troubled_functions.append(function)
# Set directory
self._data_dir = 'data'
self._data_dir = 'test_data'
if directory is not None:
self._data_dir = directory
if not os.path.exists(self._data_dir):
os.makedirs(self._data_dir)
def build_training_data(self, num_samples, normalize):
data = {}
for set_name in self._distribution:
print('Calculating ' + set_name + ' data...')
input_data, output_data = self._calculate_data_set(
round(self._distribution[set_name]*num_samples), normalize)
data[set_name] = [input_data, output_data]
print('Finished calculating ' + set_name + ' data!')
print('Calculating training data...')
input_data, output_data = self._calculate_data_set(num_samples, normalize)
data = [input_data, output_data]
print('Finished calculating training data!')
self._save_data(data, num_samples, normalize)
return data
def _save_data(self, data, num_samples, normalize):
for key in data.keys():
num_set_samples = round(num_samples*self._distribution[key])
normalize_string = 'non-' if not normalize else ''
name = '__smooth_' + str((num_set_samples*self._balance)/1000) + 'k__troubled_' \
+ str((num_set_samples*(1-self._balance))/1000)\
name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
+ str((num_samples*(1-self._balance))/1000)\
+ 'k__' + normalize_string + 'normalized.npy'
input_name = self._data_dir + '/' + key + '_input' + name
np.save(input_name, data[key][0])
output_name = self._data_dir + '/' + key + '_output' + name
np.save(output_name, data[key][1])
input_name = self._data_dir + '/training_input' + name
np.save(input_name, data[0])
output_name = self._data_dir + '/training_output' + name
np.save(output_name, data[1])
def _calculate_data_set(self, num_samples, normalize):
num_smooth_samples = round(num_samples * self._balance)
......@@ -201,11 +193,9 @@ functions = [{'function': Initial_Condition.Sine(boundary[0], boundary[1], {}),
{'function': Initial_Condition.HeavisideTwoSided(boundary[0], boundary[1], {}),
'config': {'adjustment': 0}}]
generator = TrainingDataGenerator(
functions, distribution={'train': 0.727, 'valid': 0.243, 'test': 0.03}, left_bound=boundary[0],
right_bound=boundary[1])
generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1])
# generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1])
sample_number = 66000
sample_number = 1000
data_1 = generator.build_training_data(sample_number, 0)
data_2 = generator.build_training_data(sample_number, 1)
# data_2 = generator.build_training_data(sample_number, 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment