JaneliaSciComp
diff --git a/‎src/convolutional.py‎
Lines changed: 3 additions & 64 deletions b/‎src/convolutional.py‎
Lines changed: 3 additions & 64 deletions
diff --git a/‎src/convolutional1.py‎
Lines changed: 3 additions & 38 deletions b/‎src/convolutional1.py‎
Lines changed: 3 additions & 38 deletions
diff --git a/‎src/data.py‎
Lines changed: 25 additions & 0 deletions b/‎src/data.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/generalize‎
Lines changed: 35 additions & 0 deletions b/‎src/generalize‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/gui/controller.py‎
Lines changed: 30 additions & 0 deletions b/‎src/gui/controller.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎src/gui/main.py‎
Lines changed: 7 additions & 1 deletion b/‎src/gui/main.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/gui/model.py‎
Lines changed: 10 additions & 0 deletions b/‎src/gui/model.py‎
Lines changed: 10 additions & 0 deletions
@@ -208,59 +208,8 @@ def model_parameters(time_units, freq_units, time_scale, freq_scale):
                                                                                                          ["max",
                                                                                                           "average"]],      None,                                                          True],
         ["denselayers",     "dense layers",            '',                   '',                     1, [],                 None,                                                          False],
-        ["augment_volume",  "augment volume",          '',                   '1,1',                  1, [],                 None,                                                          True],
-        ["augment_noise",   "augment noise",           '',                   '0,0',                  1, [],                 None,                                                          True],
-        ["augment_dc",      "augment DC",              '',                   '0,0',                  1, [],                 None,                                                          True],
-        ["augment_reverse", "augment reverse",         ["yes", "no"],        'no',                   1, [],                 None,                                                          True],
-        ["augment_invert",  "augment invert",          ["yes", "no"],        'no',                   1, [],                 None,                                                          True],
     ]
 
-class Augment(tf.keras.layers.Layer):
-    def __init__(self, volume_range, noise_range, baseline_range, reverse_bool, invert_bool, **kwargs):
-        super(Augment, self).__init__(**kwargs)
-        self.volume_range = volume_range
-        self.noise_range = noise_range
-        self.baseline_range = baseline_range
-        self.reverse_bool = reverse_bool
-        self.invert_bool = invert_bool
-    def get_config(self):
-        config = super().get_config().copy()
-        config.update({
-            'volume_range': self.volume_range,
-            'noise_range': self.noise_range,
-            'baseline_range': self.baseline_range,
-            'reverse_bool': self.reverse_bool,
-            'invert_bool': self.invert_bool,
-        })
-        return config
-    def call(self, inputs, training=None):
-        if not training:
-            return inputs
-        if self.volume_range != [1,1] or self.noise_range != [0,0] or self.baseline_range != [0,0]:
-            nbatch_1_nchannel = tf.stack((tf.shape(inputs)[0], 1, tf.shape(inputs)[2]), axis=0)
-        if self.volume_range != [1,1]:
-            volume_ranges = tf.random.uniform(nbatch_1_nchannel, *self.volume_range)
-            inputs = tf.math.multiply(volume_ranges, inputs)
-        if self.noise_range != [0,0]:
-            noise_ranges = tf.random.uniform(nbatch_1_nchannel, *self.noise_range)
-            noises = tf.random.normal(tf.shape(inputs), 0, noise_ranges)
-            inputs = tf.math.add(noises, inputs)
-        if self.baseline_range != [0,0]:
-            baseline_ranges = tf.random.uniform(nbatch_1_nchannel, *self.baseline_range)
-            inputs = tf.math.add(baseline_ranges, inputs)
-        if self.reverse_bool:
-            ireverse = tf.squeeze(tf.random.categorical(tf.math.log([[0.5, 0.5]]),
-                                                        tf.shape(inputs)[0], dtype=tf.int32))
-            ireverse *= tf.shape(inputs)[1]
-            inputs = tf.reverse_sequence(inputs, ireverse, seq_axis=1, batch_axis=0)
-        if self.invert_bool:
-            iinvert = tf.squeeze(tf.random.categorical(tf.math.log([[0.5, 0.5]]),
-                                                       tf.shape(inputs)[0], dtype=tf.int32))
-            iinvert = tf.cast(iinvert, tf.float32)*2-1
-            iinvert = tf.expand_dims(tf.expand_dims(iinvert, axis=1), axis=1)
-            inputs *= iinvert
-        return inputs
-
 class Spectrogram(tf.keras.layers.Layer):
     def __init__(self, window_tics, stride_tics, **kwargs):
         super(Spectrogram, self).__init__(**kwargs)
@@ -452,20 +401,10 @@ def create_model(model_settings, model_parameters, io=sys.stdout):
   inputs = Input(shape=(ninput_tics, model_settings['audio_nchannels']))
   hidden_layers.append(inputs)
 
-  volume_range = [float(x) for x in model_parameters['augment_volume'].split(',')]
-  noise_range = [float(x) for x in model_parameters['augment_noise'].split(',')]
-  dc_range = [float(x) for x in model_parameters['augment_dc'].split(',')]
-  reverse_bool = model_parameters['augment_reverse'] == 'yes'
-  invert_bool = model_parameters['augment_invert'] == 'yes'
-  if volume_range != [1,1] or noise_range != [0,0] or dc_range != [0,0]:
-    x = Augment(volume_range, noise_range, dc_range, reverse_bool, invert_bool)(inputs)
-  else:
-    x = inputs
-
   if representation == "waveform":
-    x = Reshape((ninput_tics,1,model_settings['audio_nchannels']))(x)
+    x = Reshape((ninput_tics,1,model_settings['audio_nchannels']))(inputs)
   elif representation == "spectrogram":
-    x = Spectrogram(window_tics, stride_tics)(x)
+    x = Spectrogram(window_tics, stride_tics)(inputs)
     if model_parameters['range'] != "":
       lo, hi = model_parameters['range'].split('-')
       lo = float(lo) * freq_scale
@@ -478,7 +417,7 @@ def create_model(model_settings, model_parameters, io=sys.stdout):
   elif representation == "mel-cepstrum":
     filterbank_nchannels, dct_ncoefficients = model_parameters['mel_dct'].split(',')
     x = MelCepstrum(window_tics, stride_tics, audio_tic_rate,
-                         int(filterbank_nchannels), int(dct_ncoefficients))(x)
+                         int(filterbank_nchannels), int(dct_ncoefficients))(inputs)
     hidden_layers.append(x)
   x_shape = x.shape
 
 
@@ -216,36 +216,8 @@ def model_parameters(time_units, freq_units, time_scale, freq_scale):
                                                                                                           ["max",
                                                                                                            "average"]],      None,                                                          True],
         ["denselayers",     "dense layers",             '',                   '',                     1, [],                 None,                                                          False],
-        ["augment_volume",  "augment volume",           '',                   '1,1',                  1, [],                 None,                                                          True],
-        ["augment_noise",   "augment noise",            '',                   '0,0',                  1, [],                 None,                                                          True],
     ]
 
-class Augment(tf.keras.layers.Layer):
-    def __init__(self, volume_range, noise_range, **kwargs):
-        super(Augment, self).__init__(**kwargs)
-        self.volume_range = volume_range
-        self.noise_range = noise_range
-    def get_config(self):
-        config = super().get_config().copy()
-        config.update({
-            'volume_range': self.volume_range,
-            'noise_range': self.noise_range,
-        })
-        return config
-    def call(self, inputs, training=None):
-        if not training:
-            return inputs
-        if self.volume_range != [1,1] or self.noise_range != [0,0]:
-            nbatch_1_nchannel = tf.stack((tf.shape(inputs)[0], 1, tf.shape(inputs)[2]), axis=0)
-        if self.volume_range != [1,1]:
-            volume_ranges = tf.random.uniform(nbatch_1_nchannel, *self.volume_range)
-            inputs = tf.math.multiply(volume_ranges, inputs)
-        if self.noise_range != [0,0]:
-            noise_ranges = tf.random.uniform(nbatch_1_nchannel, *self.noise_range)
-            noises = tf.random.normal(tf.shape(inputs), 0, noise_ranges)
-            inputs = tf.math.add(noises, inputs)
-        return inputs
-
 class Spectrogram(tf.keras.layers.Layer):
     def __init__(self, window_tics, stride_tics, **kwargs):
         super(Spectrogram, self).__init__(**kwargs)
@@ -444,17 +416,10 @@ def Identity(x): return lambda x: x
   inputs = Input(shape=(ninput_tics, model_settings['audio_nchannels']))
   hidden_layers.append(inputs)
 
-  volume_range = [float(x) for x in model_parameters['augment_volume'].split(',')]
-  noise_range = [float(x) for x in model_parameters['augment_noise'].split(',')]
-  if volume_range != [1,1] or noise_range != [0,0]:
-    x = Augment(volume_range, noise_range)(inputs)
-  else:
-    x = inputs
-
   if representation == "waveform":
-    x = Reshape((ninput_tics,1,model_settings['audio_nchannels']))(x)
+    x = Reshape((ninput_tics,1,model_settings['audio_nchannels']))(inputs)
   elif representation == "spectrogram":
-    x = Spectrogram(window_tics, stride_tics)(x)
+    x = Spectrogram(window_tics, stride_tics)(inputs)
     if model_parameters['range'] != "":
       lo, hi = model_parameters['range'].split('-')
       lo = float(lo) * freq_scale
@@ -467,7 +432,7 @@ def Identity(x): return lambda x: x
   elif representation == "mel-cepstrum":
     filterbank_nchannels, dct_ncoefficients = model_parameters['mel_dct'].split(',')
     x = MelCepstrum(window_tics, stride_tics, audio_tic_rate,
-                         int(filterbank_nchannels), int(dct_ncoefficients))(x)
+                         int(filterbank_nchannels), int(dct_ncoefficients))(inputs)
     hidden_layers.append(x)
   x_shape = x.shape
 
 
@@ -494,6 +494,31 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
                             root = overlapped_sound['label'].removeprefix(overlapped_prefix)
                             labels[i - offset, self.labels_list.index(root)] = target
                             sounds[-1].append({k: v for k,v in overlapped_sound.items() if k!='overlaps'})
+
+            # augmentation
+            if use_audio and mode=='training':
+                volume_range = [float(x) for x in model_settings['augment_volume'].split(',')]
+                noise_range = [float(x) for x in model_settings['augment_noise'].split(',')]
+                dc_range = [float(x) for x in model_settings['augment_dc'].split(',')]
+                reverse_bool = model_settings['augment_reverse'] == 'yes'
+                invert_bool = model_settings['augment_invert'] == 'yes'
+                if volume_range != [1,1]:
+                    volume_ranges = np.random.uniform(*volume_range, (nsounds,1,audio_nchannels))
+                    audio_slice *= volume_ranges
+                if noise_range != [0,0]:
+                    noise_ranges = np.random.uniform(*noise_range, (nsounds,1,audio_nchannels))
+                    noises = np.random.normal(0, noise_ranges, audio_slice.shape)
+                    audio_slice += noises
+                if dc_range != [0,0]:
+                    dc_ranges = np.random.uniform(*dc_range, (nsounds,1,audio_nchannels))
+                    audio_slice += dc_ranges
+                if reverse_bool:
+                    ireverse = np.random.choice([False,True], nsounds)
+                    audio_slice[ireverse] = np.flip(audio_slice[ireverse], axis=1)
+                if invert_bool:
+                    iinvert = np.random.choice([-1,1], (nsounds,1,1))
+                    audio_slice *= iinvert
+
             if use_audio and use_video:
                 q.put([[audio_slice, video_slice], labels, sounds])
             elif use_audio:
 
@@ -40,6 +40,11 @@
 #     --video_channels=0 \
 #     --batch_seed=_1 \
 #     --weights_seed=_1 \
+#     --augment_volume=1,1 \
+#     --augment_noise=0,0 \
+#     --augment_dc=0,0 \
+#     --augment_reverse=no \
+#     --augment_invert=no \
 #     --deterministic=0 \
 #     --igpu=0 \
 #     --ioffset=3 \
@@ -133,6 +138,11 @@ def main():
             "--video_channels="+FLAGS.video_channels,
             "--random_seed_batch="+str(FLAGS.batch_seed),
             "--random_seed_weights="+str(FLAGS.weights_seed),
+            "--augment_volume="+str(FLAGS.augment_volume),
+            "--augment_noise="+str(FLAGS.augment_noise),
+            "--augment_dc="+str(FLAGS.augment_dc),
+            "--augment_reverse="+str(FLAGS.augment_reverse),
+            "--augment_invert="+str(FLAGS.augment_invert),
             "--deterministic="+FLAGS.deterministic,
             "--train_dir="+os.path.join(FLAGS.logdir,"generalize_"+model),
             "--summaries_dir="+os.path.join(FLAGS.logdir,"summaries_"+model),
@@ -294,6 +304,31 @@ if __name__ == '__main__':
       type=int,
       default=59185,
       help='Randomize weight initialization if -1; otherwise use supplied number as seed.')
+  parser.add_argument(
+      '--augment_volume',
+      type=str,
+      default='1,1',
+      help='Multiply each annotation by a uniform random number in this interval when training')
+  parser.add_argument(
+      '--augment_noise',
+      type=str,
+      default='0,0',
+      help='Add noise to each annotation with a uniform random std dev in this interval when training')
+  parser.add_argument(
+      '--augment_dc',
+      type=str,
+      default='0,0',
+      help='Add to each annotation a uniform random number in this interval when training')
+  parser.add_argument(
+      '--augment_reverse',
+      type=str,
+      default='no',
+      help='Flip in time with a probability of half each annotation when training')
+  parser.add_argument(
+      '--augment_invert',
+      type=str,
+      default='no',
+      help='Negate with a probability of half each annotation when training')
   parser.add_argument(
       '--model_architecture',
       type=str,
 
@@ -1222,6 +1222,11 @@ async def train_actuate():
                 "--video_channels="+str(M.video_channels), \
                 "--batch_seed="+V.batch_seed.value, \
                 "--weights_seed="+V.weights_seed.value, \
+                "--augment_volume="+V.augment_volume.value, \
+                "--augment_noise="+V.augment_noise.value, \
+                "--augment_dc="+V.augment_dc.value, \
+                "--augment_reverse="+V.augment_reverse.value, \
+                "--augment_invert="+V.augment_invert.value, \
                 "--deterministic="+M.deterministic, \
                 "--igpu=QUEUE1", \
                 "--ireplicates="+','.join([str(x) for x in range(ireplicate, min(1+nreplicates, \
@@ -1313,6 +1318,11 @@ async def leaveout_actuate(comma):
                 "--video_channels="+str(M.video_channels), \
                 "--batch_seed="+V.batch_seed.value, \
                 "--weights_seed="+V.weights_seed.value, \
+                "--augment_volume="+V.augment_volume.value, \
+                "--augment_noise="+V.augment_noise.value, \
+                "--augment_dc="+V.augment_dc.value, \
+                "--augment_reverse="+V.augment_reverse.value, \
+                "--augment_invert="+V.augment_invert.value, \
                 "--deterministic="+M.deterministic, \
                 "--ioffset="+str(ivalidation_file),
                 "--igpu=QUEUE1", \
@@ -1384,6 +1394,11 @@ async def xvalidate_actuate():
                 "--video_channels="+str(M.video_channels), \
                 "--batch_seed="+V.batch_seed.value, \
                 "--weights_seed="+V.weights_seed.value, \
+                "--augment_volume="+V.augment_volume.value, \
+                "--augment_noise="+V.augment_noise.value, \
+                "--augment_dc="+V.augment_dc.value, \
+                "--augment_reverse="+V.augment_reverse.value, \
+                "--augment_invert="+V.augment_invert.value, \
                 "--deterministic="+M.deterministic, \
                 "--igpu=QUEUE1", \
                 "--kfold="+V.kfold.value, \
@@ -2218,6 +2233,21 @@ def _copy_callback():
             elif "random_seed_weights = " in line:
                 m=re.search('random_seed_weights = (.*)', line)
                 V.weights_seed.value = m.group(1)
+            elif "augment_volume = " in line:
+                m=re.search('augment_volume = (.*)', line)
+                V.augment_volume.value = m.group(1)
+            elif "augment_noise = " in line:
+                m=re.search('augment_noise = (.*)', line)
+                V.augment_noise.value = m.group(1)
+            elif "augment_dc = " in line:
+                m=re.search('augment_dc = (.*)', line)
+                V.augment_dc.value = m.group(1)
+            elif "augment_reverse = " in line:
+                m=re.search('augment_reverse = (.*)', line)
+                V.augment_reverse.value = m.group(1)
+            elif "augment_invert = " in line:
+                m=re.search('augment_invert = (.*)', line)
+                V.augment_invert.value = m.group(1)
             elif "validate_step_period = " in line:
                 m=re.search('validate_step_period = (\d+)', line)
                 V.save_and_validate_period.value = m.group(1)
 
@@ -177,7 +177,13 @@
                                   width=105)),
                        row(column(V.file_dialog_string,
                                   V.file_dialog_table),
-                           column(*[row([model_parameters[x] for x in p])
+                           column(row(V.augment_volume,
+                                      V.augment_noise,
+                                      V.augment_dc,
+                                      V.augment_reverse,
+                                      V.augment_invert,
+                                      width=M.gui_width_pix//2),
+                                  *[row([model_parameters[x] for x in p])
                                     for p in V.model_parameters_partitioned],
                                   V.model_summary,
                                   width=M.gui_width_pix//2))),
 
@@ -57,6 +57,11 @@ def save_state_callback():
                      'nreplicates': V.nreplicates.value,
                      'batch_seed': V.batch_seed.value,
                      'weights_seed': V.weights_seed.value,
+                     'augment_volume': V.augment_volume.value,
+                     'augment_noise': V.augment_noise.value,
+                     'augment_dc': V.augment_dc.value,
+                     'augment_reverse': V.augment_reverse.value,
+                     'augment_invert': V.augment_invert.value,
                      'labels': str.join(',',[x.value for x in V.label_texts]),
                      'file_dialog_string': V.file_dialog_string.value,
                      'context': V.context.value,
@@ -510,6 +515,11 @@ def is_local_server_or_cluster(varname, varvalue):
                           'nreplicates':'1', \
                           'batch_seed':'-1', \
                           'weights_seed':'-1', \
+                          'augment_volume':'1,1', \
+                          'augment_noise':'0,0', \
+                          'augment_dc':'0,0', \
+                          'augment_reverse':'no', \
+                          'augment_invert':'no', \
                           'labels':','*(nlabels-1), \
                           'file_dialog_string':os.getcwd(), \
                           'context':str(0.2048 / time_scale), \