Skip to content
Snippets Groups Projects
Commit 5985ab19 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Removed training data splitting during generation.

parent 2752f428
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
@author: Soraya Terrab (sorayaterrab), Laura C. Kühle @author: Soraya Terrab (sorayaterrab), Laura C. Kühle
TODO: Improve '_generate_cell_data' TODO: Improve '_generate_cell_data'
TODO: Extract normalization (At all? Over smooth and troubled separately?)
TODO: Discontinue data splitting -> Done
TODO: Improve verbose output
""" """
...@@ -15,7 +18,7 @@ import DG_Approximation ...@@ -15,7 +18,7 @@ import DG_Approximation
class TrainingDataGenerator(object): class TrainingDataGenerator(object):
def __init__(self, initial_conditions, left_bound=-1, right_bound=1, balance=0.5, def __init__(self, initial_conditions, left_bound=-1, right_bound=1, balance=0.5,
stencil_length=3, distribution=None, directory=None): stencil_length=3, directory=None):
self._balance = balance self._balance = balance
self._left_bound = left_bound self._left_bound = left_bound
self._right_bound = right_bound self._right_bound = right_bound
...@@ -25,13 +28,6 @@ class TrainingDataGenerator(object): ...@@ -25,13 +28,6 @@ class TrainingDataGenerator(object):
raise ValueError('Invalid stencil length (even value): "%d"' % stencil_length) raise ValueError('Invalid stencil length (even value): "%d"' % stencil_length)
self._stencil_length = stencil_length self._stencil_length = stencil_length
# Set distribution values
self._distribution = {'train': 1.0}
if distribution is not None:
if sum([distribution[key] for key in distribution]) != 1.0:
raise ValueError('Invalid distribution directory: "%s"' % distribution)
self._distribution = distribution
# Separate smooth and discontinuous initial conditions # Separate smooth and discontinuous initial conditions
self._smooth_functions = [] self._smooth_functions = []
self._troubled_functions = [] self._troubled_functions = []
...@@ -42,35 +38,31 @@ class TrainingDataGenerator(object): ...@@ -42,35 +38,31 @@ class TrainingDataGenerator(object):
self._troubled_functions.append(function) self._troubled_functions.append(function)
# Set directory # Set directory
self._data_dir = 'data' self._data_dir = 'test_data'
if directory is not None: if directory is not None:
self._data_dir = directory self._data_dir = directory
if not os.path.exists(self._data_dir): if not os.path.exists(self._data_dir):
os.makedirs(self._data_dir) os.makedirs(self._data_dir)
def build_training_data(self, num_samples, normalize): def build_training_data(self, num_samples, normalize):
data = {} print('Calculating training data...')
for set_name in self._distribution: input_data, output_data = self._calculate_data_set(num_samples, normalize)
print('Calculating ' + set_name + ' data...') data = [input_data, output_data]
input_data, output_data = self._calculate_data_set( print('Finished calculating training data!')
round(self._distribution[set_name]*num_samples), normalize)
data[set_name] = [input_data, output_data]
print('Finished calculating ' + set_name + ' data!')
self._save_data(data, num_samples, normalize) self._save_data(data, num_samples, normalize)
return data return data
def _save_data(self, data, num_samples, normalize): def _save_data(self, data, num_samples, normalize):
for key in data.keys():
num_set_samples = round(num_samples*self._distribution[key])
normalize_string = 'non-' if not normalize else '' normalize_string = 'non-' if not normalize else ''
name = '__smooth_' + str((num_set_samples*self._balance)/1000) + 'k__troubled_' \ name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
+ str((num_set_samples*(1-self._balance))/1000)\ + str((num_samples*(1-self._balance))/1000)\
+ 'k__' + normalize_string + 'normalized.npy' + 'k__' + normalize_string + 'normalized.npy'
input_name = self._data_dir + '/' + key + '_input' + name
np.save(input_name, data[key][0]) input_name = self._data_dir + '/training_input' + name
output_name = self._data_dir + '/' + key + '_output' + name np.save(input_name, data[0])
np.save(output_name, data[key][1]) output_name = self._data_dir + '/training_output' + name
np.save(output_name, data[1])
def _calculate_data_set(self, num_samples, normalize): def _calculate_data_set(self, num_samples, normalize):
num_smooth_samples = round(num_samples * self._balance) num_smooth_samples = round(num_samples * self._balance)
...@@ -201,11 +193,9 @@ functions = [{'function': Initial_Condition.Sine(boundary[0], boundary[1], {}), ...@@ -201,11 +193,9 @@ functions = [{'function': Initial_Condition.Sine(boundary[0], boundary[1], {}),
{'function': Initial_Condition.HeavisideTwoSided(boundary[0], boundary[1], {}), {'function': Initial_Condition.HeavisideTwoSided(boundary[0], boundary[1], {}),
'config': {'adjustment': 0}}] 'config': {'adjustment': 0}}]
generator = TrainingDataGenerator( generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1])
functions, distribution={'train': 0.727, 'valid': 0.243, 'test': 0.03}, left_bound=boundary[0],
right_bound=boundary[1])
# generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1]) # generator = TrainingDataGenerator(functions, left_bound=boundary[0], right_bound=boundary[1])
sample_number = 66000 sample_number = 1000
data_1 = generator.build_training_data(sample_number, 0) data_1 = generator.build_training_data(sample_number, 0)
data_2 = generator.build_training_data(sample_number, 1) # data_2 = generator.build_training_data(sample_number, 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment