autoencoder

bjarthur · bjarthur · commit 39ba737b8308 · 2024-11-06T10:55:18.000-05:00
diff --git a/src/accuracy b/src/accuracy
@@ -231,7 +231,7 @@ def main():
           validation_time, validation_step, \
           _, _, _, _, \
           labels_touse, label_counts, _, _, batch_size, _ = \
-          read_logs(FLAGS.logdir)
+          read_logs(FLAGS.logdir, FLAGS.loss)
     training_set_size = {k: len(label_counts[k]["training"]) * \
                             np.max(list(label_counts[k]["training"].values())) \
                          for k in label_counts.keys()}
@@ -316,6 +316,8 @@ def main():
     plt.savefig(os.path.join(FLAGS.logdir,'train-validation-loss.pdf'))
     plt.close()
   
+    if FLAGS.loss=='autoencoder':
+        return
 
     def PvR(ax, precision, recall, validation_step, minp, minr):
         minp = min(minp, min(precision))
@@ -781,8 +783,8 @@ if __name__ == "__main__":
       '--loss',
       type=str,
       default='exclusive',
-      choices=['exclusive', 'overlapped'],
-      help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
+      choices=['exclusive', 'overlapped', 'autoencoder'],
+      help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
   parser.add_argument(
       '--overlapped_prefix',
       type=str,
diff --git a/src/autoencoder.py b/src/autoencoder.py
diff --git a/src/classify b/src/classify
@@ -91,16 +91,19 @@ def main():
   with open(FLAGS.model_labels, 'r') as fid:
     model_labels = fid.read().splitlines()
 
-  if FLAGS.labels:
-    labels = np.array(FLAGS.labels.split(','))
-    iimodel_labels = np.argsort(np.argsort(model_labels))
-    ilabels = np.argsort(labels)
-    labels = labels[ilabels][iimodel_labels]
-    assert np.all(labels==model_labels)
+  if FLAGS.loss != 'autoencoder':
+      if FLAGS.labels:
+        labels = np.array(FLAGS.labels.split(','))
+        iimodel_labels = np.argsort(np.argsort(model_labels))
+        ilabels = np.argsort(labels)
+        labels = labels[ilabels][iimodel_labels]
+        assert np.all(labels==model_labels)
+      else:
+        labels = model_labels
+        ilabels = iimodel_labels = range(len(labels))
+      print('labels: '+str(labels))
   else:
-    labels = model_labels
-    ilabels = iimodel_labels = range(len(labels))
-  print('labels: '+str(labels))
+      labels = ilabels = None
 
   if FLAGS.prevalences and FLAGS.loss=='exclusive':
     prevalences = np.array([float(x) for x in FLAGS.prevalences.split(',')])
@@ -184,11 +187,17 @@ def main():
 
   context_samples = int(FLAGS.context * FLAGS.time_scale * data_sample_rate)
   stride_x_downsample_samples = (clip_window_samples - context_samples) // (FLAGS.parallelize-1)
-  clip_stride_samples = stride_x_downsample_samples * FLAGS.parallelize
+  if FLAGS.loss=='autoencoder':
+      clip_stride_samples = clip_window_samples
+  else:
+      clip_stride_samples = stride_x_downsample_samples * FLAGS.parallelize
 
   stride_x_downsample_sec = stride_x_downsample_samples / data_sample_rate
   npadding = round((FLAGS.context / 2 + FLAGS.shiftby) * FLAGS.time_scale / stride_x_downsample_sec)
-  probability_list = [np.zeros((npadding, len(labels)), dtype=np.float32)]
+  if FLAGS.loss == 'autoencoder':
+      probability_list = [np.zeros((npadding, ), dtype=np.float32)]
+  else:
+      probability_list = [np.zeros((npadding, len(labels)), dtype=np.float32)]
 
   # Inference along audio stream.
   for data_offset_samples in range(0, 1+data_len_samples, clip_stride_samples):
@@ -220,31 +229,42 @@ def main():
       inputs = tf.expand_dims(video_slice, 0)
     _,outputs = recognize_graph(inputs)
 
-    current_time_sec = np.round(data_offset_samples / data_sample_rate).astype(int)
     if pad_len>0:
       discard_len = np.ceil(pad_len/stride_x_downsample_samples).astype(int)
-      probability_list.append(np.array(outputs.numpy()[0,:-discard_len,:]))
+      if FLAGS.loss == 'autoencoder':
+          probability_list.append(np.array(outputs.numpy()[0,:-discard_len,0]))
+      else:
+          probability_list.append(np.array(outputs.numpy()[0,:-discard_len,:]))
       break
     else:
-      probability_list.append(np.array(outputs.numpy()[0,:,:]))
+      if FLAGS.loss == 'autoencoder':
+          probability_list.append(np.array(outputs.numpy()[0,:,0]))
+      else:
+          probability_list.append(np.array(outputs.numpy()[0,:,:]))
 
   sample_rate = round(1/stride_x_downsample_sec)
   if sample_rate != 1/stride_x_downsample_sec:
     print('WARNING: .wav files do not support fractional sampling rates!')
 
   probability_matrix = np.concatenate(probability_list)
-  if prevalences:
-      denominator = np.sum(probability_matrix * prevalences, axis=1)
-  for ch in range(len(labels)):
-    if prevalences:
-      adjusted_probability = probability_matrix[:,ch] * prevalences[ch]
-      adjusted_probability[npadding:] /= denominator[npadding:]
-    else:
-      adjusted_probability = probability_matrix[:,ch]
-    waveform = adjusted_probability*np.iinfo(np.int16).max
-    withoutext = trim_ext(FLAGS.wav)
-    filename = withoutext+'-'+labels[ch]+'.wav'
-    wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
+  if FLAGS.loss != 'autoencoder':
+      if prevalences:
+          denominator = np.sum(probability_matrix * prevalences, axis=1)
+      for ch in range(len(labels)):
+        if prevalences:
+          adjusted_probability = probability_matrix[:,ch] * prevalences[ch]
+          adjusted_probability[npadding:] /= denominator[npadding:]
+        else:
+          adjusted_probability = probability_matrix[:,ch]
+        waveform = adjusted_probability*np.iinfo(np.int16).max
+        withoutext = trim_ext(FLAGS.wav)
+        filename = withoutext+'-'+labels[ch]+'.wav'
+        wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
+  else:
+        waveform = probability_matrix * np.iinfo(np.int16).max
+        withoutext = trim_ext(FLAGS.wav)
+        filename = withoutext+'-.wav'
+        wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
 
 if __name__ == '__main__':
   parser = argparse.ArgumentParser(description='test_streaming_accuracy')
@@ -271,8 +291,8 @@ if __name__ == '__main__':
       '--loss',
       type=str,
       default='exclusive',
-      choices=['exclusive', 'overlapped'],
-      help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
+      choices=['exclusive', 'overlapped', 'autoencoder'],
+      help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
   parser.add_argument(
       '--context',
       type=float,
diff --git a/src/data.py b/src/data.py
@@ -445,7 +445,7 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
                 bkg = {}
             if loss=='exclusive':
                 labels = np.zeros(nsounds, dtype=np.int32)
-            else:
+            elif loss=='overlapped':
                 labels = 2*np.ones((nsounds, len(self.labels_list)), dtype=np.float32)
             # repeatedly to generate the final output sound data we'll use in training.
             for i in range(offset, offset + nsounds):
@@ -481,7 +481,7 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
                 if loss=='exclusive':
                     labels[i - offset] = self.labels_list.index(sound['label'])
                     sounds.append({k: v for k,v in sound.items() if k!='overlaps'})
-                else:
+                elif loss=='overlapped':
                     target = 0 if sound['label'].startswith(overlapped_prefix) else 1
                     root = sound['label'].removeprefix(overlapped_prefix)
                     labels[i - offset, self.labels_list.index(root)] = target
@@ -519,6 +519,9 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
                     iinvert = np.random.choice([-1,1], (nsounds,1,1))
                     audio_slice *= iinvert
 
+            if loss=='autoencoder':
+                labels = audio_slice
+
             if use_audio and use_video:
                 q.put([[audio_slice, video_slice], labels, sounds])
             elif use_audio:
diff --git a/src/freeze b/src/freeze
@@ -120,7 +120,7 @@ def create_inference_graph():
           hidden, output = self.thismodel(waveform, training=False)
           if FLAGS.loss=='exclusive':
               output = tf.nn.softmax(output)
-          else:
+          elif FLAGS.loss=='overlapped':
               output = tf.math.sigmoid(output)
           return hidden, output
 
@@ -231,8 +231,8 @@ if __name__ == '__main__':
       '--loss',
       type=str,
       default='exclusive',
-      choices=['exclusive', 'overlapped'],
-      help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
+      choices=['exclusive', 'overlapped', 'autoencoder'],
+      help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
   parser.add_argument(
       '--labels_touse',
       type=str,
diff --git a/src/gui/controller.py b/src/gui/controller.py
@@ -1616,18 +1616,26 @@ def accuracy_succeeded(logdir, reftime):
     logfile = os.path.join(logdir, 'accuracy.log')
     if not logfile_succeeded(logfile, reftime):
         return False
-    traindirs = list(filter(lambda x: os.path.isdir(os.path.join(logdir,x)) and \
-                            not x.startswith('summaries_'), os.listdir(logdir)))
-    toplevelfiles = ["precision-recall.pdf",
-                     "confusion-matrix.pdf",
-                     "train-validation-loss.pdf",
-                     "P-R-F1-average.pdf",
-                     "P-R-F1-label.pdf",
-                     "P-R-F1-model.pdf",
-                     "PvR.pdf"]
+    with open(logfile) as fid:
+        for line in fid:
+            if "loss = " in line:
+                m=re.search('loss = (.+)',line)
+                loss = m.group(1)
+    toplevelfiles = ["train-validation-loss.pdf"]
+    if loss != 'autoencoder':
+        toplevelfiles.extend(["precision-recall.pdf",
+                              "confusion-matrix.pdf",
+                              "P-R-F1-average.pdf",
+                              "P-R-F1-label.pdf",
+                              "P-R-F1-model.pdf",
+                              "PvR.pdf"])
     for toplevelfile in toplevelfiles:
         if not pdffile_succeeded(os.path.join(logdir, toplevelfile), reftime):
             return False
+    if loss == 'autoencoder':
+        return True
+    traindirs = list(filter(lambda x: os.path.isdir(os.path.join(logdir,x)) and \
+                            not x.startswith('summaries_'), os.listdir(logdir)))
     one_fold_has_thresholds = False
     for traindir in traindirs:
         trainfiles = os.listdir(os.path.join(logdir,traindir))
diff --git a/src/gui/view.py b/src/gui/view.py
@@ -1219,7 +1219,7 @@ def recordings_update():
                 wavfiles.append(wavfile)
         for wavfile in wavfiles:
             M.used_sounds.append({'file': list(os.path.split(wavfile)),
-                                  'ticks': [1, 1], 'kind': '', 'label': ' '})
+                                  'ticks': [1, 1], 'kind': '', 'label': ''})
     elif M.dfs:
         wavfiles = set()
         kinds = kinds_touse.value.split(',')
@@ -2173,7 +2173,7 @@ def init(_bokeh_document):
 
     loss = Select(title="loss", height=50, \
                   value=M.state['loss'], \
-                  options=["exclusive", "overlapped"])
+                  options=["exclusive", "overlapped", "autoencoder"])
     loss.on_change('value', lambda a,o,n: C.generic_parameters_callback(''))
 
     learning_rate = TextInput(value=M.state['learning_rate'], \
diff --git a/src/lib.py b/src/lib.py
@@ -335,7 +335,7 @@ def layout(nplots):
   return nrows, ncols
 
 
-def read_log(frompath, logfile):
+def read_log(frompath, logfile, loss='exclusive'):
   train_accuracy=[]; train_loss=[]; train_time=[]; train_step=[]
   validation_time=[]; validation_step=[]
   validation_precision=[]; validation_recall=[]
@@ -410,11 +410,14 @@ def read_log(frompath, logfile):
               conf_matrix_state=False
               confusion_string=""
       elif "Validation\n" in line:
-        validation_precision.append(precision)
-        validation_recall.append(recall)
-        validation_precision_mean.append(precision_mean)
-        validation_recall_mean.append(recall_mean)
-        m=re.search('^([0-9.]+),([0-9]+),[0-9.]+ Validation$',line)
+        m=re.search('^([0-9.]+),([0-9]+),([0-9.])+ Validation$',line)
+        if loss != 'autoencoder':
+            validation_precision.append(precision)
+            validation_recall.append(recall)
+            validation_precision_mean.append(precision_mean)
+            validation_recall_mean.append(recall_mean)
+        else:
+            validation_recall_mean.append(float(m.group(3)))
         validation_time_value = float(m.group(1))
         if len(validation_time)>0 and \
                 (validation_time_value+validation_restart_correction)<validation_time[-1]:
@@ -449,7 +452,7 @@ def read_log(frompath, logfile):
          #test_accuracy, \
 
 
-def read_logs(frompath):
+def read_logs(frompath, loss='exclusive'):
   train_accuracy={}; train_loss={}; train_time={}; train_step={}
   validation_precision={}; validation_recall={}
   validation_precision_mean={}; validation_recall_mean={}
@@ -474,7 +477,7 @@ def read_logs(frompath):
           labels_touse[model], label_counts[model], \
           nparameters_total[model], nparameters_finallayer[model], \
           batch_size[model], nlayers[model] = \
-          read_log(frompath, logfile)
+          read_log(frompath, logfile, loss)
           #test_accuracy[model], \
 
   return train_accuracy, train_loss, train_time, train_step, \
diff --git a/src/loop b/src/loop
diff --git a/src/train b/src/train