Skip to content
Snippets Groups Projects
Commit 3ce351a1 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Removed unnecessary instance variables from TrainingDataGenerator.

parent f3d65297
No related branches found
No related tags found
No related merge requests found
...@@ -22,50 +22,27 @@ class TrainingDataGenerator: ...@@ -22,50 +22,27 @@ class TrainingDataGenerator:
Generates random training data for given initial conditions. Generates random training data for given initial conditions.
Attributes
----------
smooth_functions : list
List of smooth initial/continuous conditions.
troubled_functions : list
List of discontinuous initial conditions.
data_dir : str
Path to directory in which training data is saved.
Methods Methods
------- -------
build_training_data(num_samples) build_training_data(num_samples)
Builds random training data. Builds random training data.
""" """
def __init__(self, initial_conditions, left_bound=-1, right_bound=1, def __init__(self, left_bound=-1, right_bound=1, stencil_length=3):
balance=0.5, stencil_length=3, directory='test_data',
add_reconstructions=True):
"""Initializes TrainingDataGenerator. """Initializes TrainingDataGenerator.
Parameters Parameters
---------- ----------
initial_conditions : list
List of names of initial conditions for training.
left_bound : float, optional left_bound : float, optional
Left boundary of interval. Default: -1. Left boundary of interval. Default: -1.
right_bound : float, optional right_bound : float, optional
Right boundary of interval. Default: 1. Right boundary of interval. Default: 1.
balance: float, optional
Ratio between smooth and discontinuous training data. Default: 0.5.
stencil_length : int, optional stencil_length : int, optional
Size of training data array. Default: 3. Size of training data array. Default: 3.
directory : str, optional
Path to directory in which training data is saved.
Default: 'test_data'.
add_reconstructions: bool, optional
Flag whether reconstructions of the middle cell are included.
Default: True.
""" """
self._balance = balance
self._left_bound = left_bound self._left_bound = left_bound
self._right_bound = right_bound self._right_bound = right_bound
self._add_reconstructions = add_reconstructions
# Set stencil length # Set stencil length
if stencil_length % 2 == 0: if stencil_length % 2 == 0:
...@@ -73,29 +50,26 @@ class TrainingDataGenerator: ...@@ -73,29 +50,26 @@ class TrainingDataGenerator:
% stencil_length) % stencil_length)
self._stencil_length = stencil_length self._stencil_length = stencil_length
# Separate smooth and discontinuous initial conditions def build_training_data(self, initial_conditions, num_samples, balance=0.5,
self._smooth_functions = [] directory='test_data', add_reconstructions=True):
self._troubled_functions = []
for function in initial_conditions:
if function['function'].is_smooth():
self._smooth_functions.append(function)
else:
self._troubled_functions.append(function)
# Set directory
self._data_dir = directory
if not os.path.exists(self._data_dir):
os.makedirs(self._data_dir)
def build_training_data(self, num_samples):
"""Builds random training data. """Builds random training data.
Creates training data consisting of random ANN input and saves it. Creates training data consisting of random ANN input and saves it.
Parameters Parameters
---------- ----------
initial_conditions : list
List of names of initial conditions for training.
num_samples : int num_samples : int
Number of training data samples to generate. Number of training data samples to generate.
balance : float, optional
Ratio between smooth and discontinuous training data. Default: 0.5.
directory : str, optional
Path to directory in which training data is saved.
Default: 'test_data'.
add_reconstructions : bool, optional
Flag whether reconstructions of the middle cell are included.
Default: True.
Returns Returns
------- -------
...@@ -106,15 +80,18 @@ class TrainingDataGenerator: ...@@ -106,15 +80,18 @@ class TrainingDataGenerator:
""" """
tic = time.perf_counter() tic = time.perf_counter()
print('Calculating training data...\n') print('Calculating training data...\n')
data_dict = self._calculate_data_set(num_samples) data_dict = self._calculate_data_set(initial_conditions,
num_samples, balance,
add_reconstructions)
print('Finished calculating training data!') print('Finished calculating training data!')
self._save_data(data_dict) self._save_data(directory=directory, data=data_dict)
toc = time.perf_counter() toc = time.perf_counter()
print(f'Total runtime: {toc - tic:0.4f}s') print(f'Total runtime: {toc - tic:0.4f}s')
return data_dict return data_dict
def _calculate_data_set(self, num_samples): def _calculate_data_set(self, initial_conditions, num_samples, balance,
add_reconstructions):
"""Calculates random training data of given stencil length. """Calculates random training data of given stencil length.
Creates training data with a given ratio between smooth and Creates training data with a given ratio between smooth and
...@@ -122,8 +99,14 @@ class TrainingDataGenerator: ...@@ -122,8 +99,14 @@ class TrainingDataGenerator:
Parameters Parameters
---------- ----------
initial_conditions : list
List of names of initial conditions for training.
num_samples : int num_samples : int
Number of training data samples to generate. Number of training data samples to generate.
balance : float
Ratio between smooth and discontinuous training data.
add_reconstructions : bool
Flag whether reconstructions of the middle cell are included.
Returns Returns
------- -------
...@@ -132,13 +115,24 @@ class TrainingDataGenerator: ...@@ -132,13 +115,24 @@ class TrainingDataGenerator:
output data. output data.
""" """
num_smooth_samples = round(num_samples * self._balance) # print(type(initial_conditions))
# Separate smooth and discontinuous initial conditions
smooth_functions = []
troubled_functions = []
for function in initial_conditions:
if function['function'].is_smooth():
smooth_functions.append(function)
else:
troubled_functions.append(function)
num_smooth_samples = round(num_samples * balance)
smooth_input, smooth_output = self._generate_cell_data( smooth_input, smooth_output = self._generate_cell_data(
num_smooth_samples, self._smooth_functions, True) num_smooth_samples, smooth_functions, add_reconstructions, True)
num_troubled_samples = num_samples - num_smooth_samples num_troubled_samples = num_samples - num_smooth_samples
troubled_input, troubled_output = self._generate_cell_data( troubled_input, troubled_output = self._generate_cell_data(
num_troubled_samples, self._troubled_functions, False) num_troubled_samples, troubled_functions, add_reconstructions,
False)
# Merge Data # Merge Data
input_matrix = np.concatenate((smooth_input, troubled_input), axis=0) input_matrix = np.concatenate((smooth_input, troubled_input), axis=0)
...@@ -157,7 +151,8 @@ class TrainingDataGenerator: ...@@ -157,7 +151,8 @@ class TrainingDataGenerator:
return {'input_data.raw': input_matrix, 'output_data': output_matrix, return {'input_data.raw': input_matrix, 'output_data': output_matrix,
'input_data.normalized': norm_input_matrix} 'input_data.normalized': norm_input_matrix}
def _generate_cell_data(self, num_samples, initial_conditions, is_smooth): def _generate_cell_data(self, num_samples, initial_conditions,
add_reconstructions, is_smooth):
"""Generates random training input and output. """Generates random training input and output.
Generates random training input and output for either smooth or Generates random training input and output for either smooth or
...@@ -170,6 +165,8 @@ class TrainingDataGenerator: ...@@ -170,6 +165,8 @@ class TrainingDataGenerator:
Number of training data samples to generate. Number of training data samples to generate.
initial_conditions : list initial_conditions : list
List of names of initial conditions for training. List of names of initial conditions for training.
add_reconstructions : bool
Flag whether reconstructions of the middle cell are included.
is_smooth : bool is_smooth : bool
Flag whether initial conditions are smooth. Flag whether initial conditions are smooth.
...@@ -181,13 +178,14 @@ class TrainingDataGenerator: ...@@ -181,13 +178,14 @@ class TrainingDataGenerator:
Array containing output data. Array containing output data.
""" """
# print(type(initial_conditions))
troubled_indicator = 'without' if is_smooth else 'with' troubled_indicator = 'without' if is_smooth else 'with'
print('Calculating data ' + troubled_indicator + ' troubled cells...') print('Calculating data ' + troubled_indicator + ' troubled cells...')
print('Samples to complete:', num_samples) print('Samples to complete:', num_samples)
tic = time.perf_counter() tic = time.perf_counter()
num_datapoints = self._stencil_length num_datapoints = self._stencil_length
if self._add_reconstructions: if add_reconstructions:
num_datapoints += 2 num_datapoints += 2
input_data = np.zeros((num_samples, num_datapoints)) input_data = np.zeros((num_samples, num_datapoints))
num_init_cond = len(initial_conditions) num_init_cond = len(initial_conditions)
...@@ -212,19 +210,19 @@ class TrainingDataGenerator: ...@@ -212,19 +210,19 @@ class TrainingDataGenerator:
# Calculate basis coefficients for stencil # Calculate basis coefficients for stencil
polynomial_degree = np.random.randint(1, high=5) polynomial_degree = np.random.randint(1, high=5)
basis = basis_list[polynomial_degree]
mesh = Mesh(num_grid_cells=self._stencil_length, num_ghost_cells=2, mesh = Mesh(num_grid_cells=self._stencil_length, num_ghost_cells=2,
left_bound=left_bound, right_bound=right_bound) left_bound=left_bound, right_bound=right_bound)
projection = do_initial_projection( projection = do_initial_projection(
initial_condition=initial_condition, mesh=mesh, initial_condition=initial_condition, mesh=mesh,
basis=basis, basis=basis_list[polynomial_degree],
quadrature=quadrature_list[polynomial_degree], quadrature=quadrature_list[polynomial_degree],
adjustment=adjustment) adjustment=adjustment)
input_data[i] = basis.calculate_cell_average( input_data[i] = basis_list[
polynomial_degree].calculate_cell_average(
projection=projection[:, 1:-1], projection=projection[:, 1:-1],
stencil_length=self._stencil_length, stencil_length=self._stencil_length,
add_reconstructions=self._add_reconstructions) add_reconstructions=add_reconstructions)
count += 1 count += 1
if count % 1000 == 0: if count % 1000 == 0:
...@@ -298,9 +296,14 @@ class TrainingDataGenerator: ...@@ -298,9 +296,14 @@ class TrainingDataGenerator:
normalized_input_data.append(entry / max_function_value) normalized_input_data.append(entry / max_function_value)
return np.array(normalized_input_data) return np.array(normalized_input_data)
def _save_data(self, data): @staticmethod
def _save_data(directory, data):
"""Saves data.""" """Saves data."""
# Set directory
if not os.path.exists(directory):
os.makedirs(directory)
print('Saving training data.') print('Saving training data.')
for key in data.keys(): for key in data.keys():
name = self._data_dir + '/' + key + '.npy' name = directory + '/' + key + '.npy'
np.save(name, data[key]) np.save(name, data[key])
...@@ -38,10 +38,9 @@ rule generate_data: ...@@ -38,10 +38,9 @@ rule generate_data:
with open(str(log), 'w') as logfile: with open(str(log), 'w') as logfile:
sys.stdout = logfile sys.stdout = logfile
generator = ANN_Data_Generator.TrainingDataGenerator( generator = ANN_Data_Generator.TrainingDataGenerator(
initial_conditions=initial_conditions,
left_bound=params.left_bound, right_bound=params.right_bound, left_bound=params.left_bound, right_bound=params.right_bound,
balance=params.balance, stencil_length=params.stencil_length)
stencil_length=params.stencil_length, directory=DIR, data = generator.build_training_data(balance=params.balance,
initial_conditions=initial_conditions, directory=DIR,
num_samples=params.sample_number,
add_reconstructions=params.reconstruction_flag) add_reconstructions=params.reconstruction_flag)
\ No newline at end of file
data = generator.build_training_data(
num_samples=params.sample_number)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment