@@ -208,59 +208,8 @@ def model_parameters(time_units, freq_units, time_scale, freq_scale):
208208 ["max" ,
209209 "average" ]], None , True ],
210210 ["denselayers" , "dense layers" , '' , '' , 1 , [], None , False ],
211- ["augment_volume" , "augment volume" , '' , '1,1' , 1 , [], None , True ],
212- ["augment_noise" , "augment noise" , '' , '0,0' , 1 , [], None , True ],
213- ["augment_dc" , "augment DC" , '' , '0,0' , 1 , [], None , True ],
214- ["augment_reverse" , "augment reverse" , ["yes" , "no" ], 'no' , 1 , [], None , True ],
215- ["augment_invert" , "augment invert" , ["yes" , "no" ], 'no' , 1 , [], None , True ],
216211 ]
217212
218- class Augment (tf .keras .layers .Layer ):
219- def __init__ (self , volume_range , noise_range , baseline_range , reverse_bool , invert_bool , ** kwargs ):
220- super (Augment , self ).__init__ (** kwargs )
221- self .volume_range = volume_range
222- self .noise_range = noise_range
223- self .baseline_range = baseline_range
224- self .reverse_bool = reverse_bool
225- self .invert_bool = invert_bool
226- def get_config (self ):
227- config = super ().get_config ().copy ()
228- config .update ({
229- 'volume_range' : self .volume_range ,
230- 'noise_range' : self .noise_range ,
231- 'baseline_range' : self .baseline_range ,
232- 'reverse_bool' : self .reverse_bool ,
233- 'invert_bool' : self .invert_bool ,
234- })
235- return config
236- def call (self , inputs , training = None ):
237- if not training :
238- return inputs
239- if self .volume_range != [1 ,1 ] or self .noise_range != [0 ,0 ] or self .baseline_range != [0 ,0 ]:
240- nbatch_1_nchannel = tf .stack ((tf .shape (inputs )[0 ], 1 , tf .shape (inputs )[2 ]), axis = 0 )
241- if self .volume_range != [1 ,1 ]:
242- volume_ranges = tf .random .uniform (nbatch_1_nchannel , * self .volume_range )
243- inputs = tf .math .multiply (volume_ranges , inputs )
244- if self .noise_range != [0 ,0 ]:
245- noise_ranges = tf .random .uniform (nbatch_1_nchannel , * self .noise_range )
246- noises = tf .random .normal (tf .shape (inputs ), 0 , noise_ranges )
247- inputs = tf .math .add (noises , inputs )
248- if self .baseline_range != [0 ,0 ]:
249- baseline_ranges = tf .random .uniform (nbatch_1_nchannel , * self .baseline_range )
250- inputs = tf .math .add (baseline_ranges , inputs )
251- if self .reverse_bool :
252- ireverse = tf .squeeze (tf .random .categorical (tf .math .log ([[0.5 , 0.5 ]]),
253- tf .shape (inputs )[0 ], dtype = tf .int32 ))
254- ireverse *= tf .shape (inputs )[1 ]
255- inputs = tf .reverse_sequence (inputs , ireverse , seq_axis = 1 , batch_axis = 0 )
256- if self .invert_bool :
257- iinvert = tf .squeeze (tf .random .categorical (tf .math .log ([[0.5 , 0.5 ]]),
258- tf .shape (inputs )[0 ], dtype = tf .int32 ))
259- iinvert = tf .cast (iinvert , tf .float32 )* 2 - 1
260- iinvert = tf .expand_dims (tf .expand_dims (iinvert , axis = 1 ), axis = 1 )
261- inputs *= iinvert
262- return inputs
263-
264213class Spectrogram (tf .keras .layers .Layer ):
265214 def __init__ (self , window_tics , stride_tics , ** kwargs ):
266215 super (Spectrogram , self ).__init__ (** kwargs )
@@ -452,20 +401,10 @@ def create_model(model_settings, model_parameters, io=sys.stdout):
452401 inputs = Input (shape = (ninput_tics , model_settings ['audio_nchannels' ]))
453402 hidden_layers .append (inputs )
454403
455- volume_range = [float (x ) for x in model_parameters ['augment_volume' ].split (',' )]
456- noise_range = [float (x ) for x in model_parameters ['augment_noise' ].split (',' )]
457- dc_range = [float (x ) for x in model_parameters ['augment_dc' ].split (',' )]
458- reverse_bool = model_parameters ['augment_reverse' ] == 'yes'
459- invert_bool = model_parameters ['augment_invert' ] == 'yes'
460- if volume_range != [1 ,1 ] or noise_range != [0 ,0 ] or dc_range != [0 ,0 ]:
461- x = Augment (volume_range , noise_range , dc_range , reverse_bool , invert_bool )(inputs )
462- else :
463- x = inputs
464-
465404 if representation == "waveform" :
466- x = Reshape ((ninput_tics ,1 ,model_settings ['audio_nchannels' ]))(x )
405+ x = Reshape ((ninput_tics ,1 ,model_settings ['audio_nchannels' ]))(inputs )
467406 elif representation == "spectrogram" :
468- x = Spectrogram (window_tics , stride_tics )(x )
407+ x = Spectrogram (window_tics , stride_tics )(inputs )
469408 if model_parameters ['range' ] != "" :
470409 lo , hi = model_parameters ['range' ].split ('-' )
471410 lo = float (lo ) * freq_scale
@@ -478,7 +417,7 @@ def create_model(model_settings, model_parameters, io=sys.stdout):
478417 elif representation == "mel-cepstrum" :
479418 filterbank_nchannels , dct_ncoefficients = model_parameters ['mel_dct' ].split (',' )
480419 x = MelCepstrum (window_tics , stride_tics , audio_tic_rate ,
481- int (filterbank_nchannels ), int (dct_ncoefficients ))(x )
420+ int (filterbank_nchannels ), int (dct_ncoefficients ))(inputs )
482421 hidden_layers .append (x )
483422 x_shape = x .shape
484423
0 commit comments