Skip to content

Commit 39ba737

Browse files
committed
autoencoder
1 parent caa6164 commit 39ba737

10 files changed

Lines changed: 692 additions & 86 deletions

File tree

src/accuracy

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def main():
231231
validation_time, validation_step, \
232232
_, _, _, _, \
233233
labels_touse, label_counts, _, _, batch_size, _ = \
234-
read_logs(FLAGS.logdir)
234+
read_logs(FLAGS.logdir, FLAGS.loss)
235235
training_set_size = {k: len(label_counts[k]["training"]) * \
236236
np.max(list(label_counts[k]["training"].values())) \
237237
for k in label_counts.keys()}
@@ -316,6 +316,8 @@ def main():
316316
plt.savefig(os.path.join(FLAGS.logdir,'train-validation-loss.pdf'))
317317
plt.close()
318318

319+
if FLAGS.loss=='autoencoder':
320+
return
319321

320322
def PvR(ax, precision, recall, validation_step, minp, minr):
321323
minp = min(minp, min(precision))
@@ -781,8 +783,8 @@ if __name__ == "__main__":
781783
'--loss',
782784
type=str,
783785
default='exclusive',
784-
choices=['exclusive', 'overlapped'],
785-
help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
786+
choices=['exclusive', 'overlapped', 'autoencoder'],
787+
help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
786788
parser.add_argument(
787789
'--overlapped_prefix',
788790
type=str,

src/autoencoder.py

Lines changed: 546 additions & 0 deletions
Large diffs are not rendered by default.

src/classify

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,19 @@ def main():
9191
with open(FLAGS.model_labels, 'r') as fid:
9292
model_labels = fid.read().splitlines()
9393

94-
if FLAGS.labels:
95-
labels = np.array(FLAGS.labels.split(','))
96-
iimodel_labels = np.argsort(np.argsort(model_labels))
97-
ilabels = np.argsort(labels)
98-
labels = labels[ilabels][iimodel_labels]
99-
assert np.all(labels==model_labels)
94+
if FLAGS.loss != 'autoencoder':
95+
if FLAGS.labels:
96+
labels = np.array(FLAGS.labels.split(','))
97+
iimodel_labels = np.argsort(np.argsort(model_labels))
98+
ilabels = np.argsort(labels)
99+
labels = labels[ilabels][iimodel_labels]
100+
assert np.all(labels==model_labels)
101+
else:
102+
labels = model_labels
103+
ilabels = iimodel_labels = range(len(labels))
104+
print('labels: '+str(labels))
100105
else:
101-
labels = model_labels
102-
ilabels = iimodel_labels = range(len(labels))
103-
print('labels: '+str(labels))
106+
labels = ilabels = None
104107

105108
if FLAGS.prevalences and FLAGS.loss=='exclusive':
106109
prevalences = np.array([float(x) for x in FLAGS.prevalences.split(',')])
@@ -184,11 +187,17 @@ def main():
184187

185188
context_samples = int(FLAGS.context * FLAGS.time_scale * data_sample_rate)
186189
stride_x_downsample_samples = (clip_window_samples - context_samples) // (FLAGS.parallelize-1)
187-
clip_stride_samples = stride_x_downsample_samples * FLAGS.parallelize
190+
if FLAGS.loss=='autoencoder':
191+
clip_stride_samples = clip_window_samples
192+
else:
193+
clip_stride_samples = stride_x_downsample_samples * FLAGS.parallelize
188194

189195
stride_x_downsample_sec = stride_x_downsample_samples / data_sample_rate
190196
npadding = round((FLAGS.context / 2 + FLAGS.shiftby) * FLAGS.time_scale / stride_x_downsample_sec)
191-
probability_list = [np.zeros((npadding, len(labels)), dtype=np.float32)]
197+
if FLAGS.loss == 'autoencoder':
198+
probability_list = [np.zeros((npadding, ), dtype=np.float32)]
199+
else:
200+
probability_list = [np.zeros((npadding, len(labels)), dtype=np.float32)]
192201

193202
# Inference along audio stream.
194203
for data_offset_samples in range(0, 1+data_len_samples, clip_stride_samples):
@@ -220,31 +229,42 @@ def main():
220229
inputs = tf.expand_dims(video_slice, 0)
221230
_,outputs = recognize_graph(inputs)
222231

223-
current_time_sec = np.round(data_offset_samples / data_sample_rate).astype(int)
224232
if pad_len>0:
225233
discard_len = np.ceil(pad_len/stride_x_downsample_samples).astype(int)
226-
probability_list.append(np.array(outputs.numpy()[0,:-discard_len,:]))
234+
if FLAGS.loss == 'autoencoder':
235+
probability_list.append(np.array(outputs.numpy()[0,:-discard_len,0]))
236+
else:
237+
probability_list.append(np.array(outputs.numpy()[0,:-discard_len,:]))
227238
break
228239
else:
229-
probability_list.append(np.array(outputs.numpy()[0,:,:]))
240+
if FLAGS.loss == 'autoencoder':
241+
probability_list.append(np.array(outputs.numpy()[0,:,0]))
242+
else:
243+
probability_list.append(np.array(outputs.numpy()[0,:,:]))
230244

231245
sample_rate = round(1/stride_x_downsample_sec)
232246
if sample_rate != 1/stride_x_downsample_sec:
233247
print('WARNING: .wav files do not support fractional sampling rates!')
234248

235249
probability_matrix = np.concatenate(probability_list)
236-
if prevalences:
237-
denominator = np.sum(probability_matrix * prevalences, axis=1)
238-
for ch in range(len(labels)):
239-
if prevalences:
240-
adjusted_probability = probability_matrix[:,ch] * prevalences[ch]
241-
adjusted_probability[npadding:] /= denominator[npadding:]
242-
else:
243-
adjusted_probability = probability_matrix[:,ch]
244-
waveform = adjusted_probability*np.iinfo(np.int16).max
245-
withoutext = trim_ext(FLAGS.wav)
246-
filename = withoutext+'-'+labels[ch]+'.wav'
247-
wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
250+
if FLAGS.loss != 'autoencoder':
251+
if prevalences:
252+
denominator = np.sum(probability_matrix * prevalences, axis=1)
253+
for ch in range(len(labels)):
254+
if prevalences:
255+
adjusted_probability = probability_matrix[:,ch] * prevalences[ch]
256+
adjusted_probability[npadding:] /= denominator[npadding:]
257+
else:
258+
adjusted_probability = probability_matrix[:,ch]
259+
waveform = adjusted_probability*np.iinfo(np.int16).max
260+
withoutext = trim_ext(FLAGS.wav)
261+
filename = withoutext+'-'+labels[ch]+'.wav'
262+
wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
263+
else:
264+
waveform = probability_matrix * np.iinfo(np.int16).max
265+
withoutext = trim_ext(FLAGS.wav)
266+
filename = withoutext+'-.wav'
267+
wavfile.write(filename, int(sample_rate), waveform.astype('int16'))
248268

249269
if __name__ == '__main__':
250270
parser = argparse.ArgumentParser(description='test_streaming_accuracy')
@@ -271,8 +291,8 @@ if __name__ == '__main__':
271291
'--loss',
272292
type=str,
273293
default='exclusive',
274-
choices=['exclusive', 'overlapped'],
275-
help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
294+
choices=['exclusive', 'overlapped', 'autoencoder'],
295+
help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
276296
parser.add_argument(
277297
'--context',
278298
type=float,

src/data.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
445445
bkg = {}
446446
if loss=='exclusive':
447447
labels = np.zeros(nsounds, dtype=np.int32)
448-
else:
448+
elif loss=='overlapped':
449449
labels = 2*np.ones((nsounds, len(self.labels_list)), dtype=np.float32)
450450
# repeatedly to generate the final output sound data we'll use in training.
451451
for i in range(offset, offset + nsounds):
@@ -481,7 +481,7 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
481481
if loss=='exclusive':
482482
labels[i - offset] = self.labels_list.index(sound['label'])
483483
sounds.append({k: v for k,v in sound.items() if k!='overlaps'})
484-
else:
484+
elif loss=='overlapped':
485485
target = 0 if sound['label'].startswith(overlapped_prefix) else 1
486486
root = sound['label'].removeprefix(overlapped_prefix)
487487
labels[i - offset, self.labels_list.index(root)] = target
@@ -519,6 +519,9 @@ def _get_data(self, q, o, how_many, offset, model_settings, loss, overlapped_pre
519519
iinvert = np.random.choice([-1,1], (nsounds,1,1))
520520
audio_slice *= iinvert
521521

522+
if loss=='autoencoder':
523+
labels = audio_slice
524+
522525
if use_audio and use_video:
523526
q.put([[audio_slice, video_slice], labels, sounds])
524527
elif use_audio:

src/freeze

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def create_inference_graph():
120120
hidden, output = self.thismodel(waveform, training=False)
121121
if FLAGS.loss=='exclusive':
122122
output = tf.nn.softmax(output)
123-
else:
123+
elif FLAGS.loss=='overlapped':
124124
output = tf.math.sigmoid(output)
125125
return hidden, output
126126

@@ -231,8 +231,8 @@ if __name__ == '__main__':
231231
'--loss',
232232
type=str,
233233
default='exclusive',
234-
choices=['exclusive', 'overlapped'],
235-
help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
234+
choices=['exclusive', 'overlapped', 'autoencoder'],
235+
help='Sigmoid cross entropy is used for "overlapped" or "autoencoder" labels while softmax cross entropy is used for "exclusive" labels.')
236236
parser.add_argument(
237237
'--labels_touse',
238238
type=str,

src/gui/controller.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,18 +1616,26 @@ def accuracy_succeeded(logdir, reftime):
16161616
logfile = os.path.join(logdir, 'accuracy.log')
16171617
if not logfile_succeeded(logfile, reftime):
16181618
return False
1619-
traindirs = list(filter(lambda x: os.path.isdir(os.path.join(logdir,x)) and \
1620-
not x.startswith('summaries_'), os.listdir(logdir)))
1621-
toplevelfiles = ["precision-recall.pdf",
1622-
"confusion-matrix.pdf",
1623-
"train-validation-loss.pdf",
1624-
"P-R-F1-average.pdf",
1625-
"P-R-F1-label.pdf",
1626-
"P-R-F1-model.pdf",
1627-
"PvR.pdf"]
1619+
with open(logfile) as fid:
1620+
for line in fid:
1621+
if "loss = " in line:
1622+
m=re.search('loss = (.+)',line)
1623+
loss = m.group(1)
1624+
toplevelfiles = ["train-validation-loss.pdf"]
1625+
if loss != 'autoencoder':
1626+
toplevelfiles.extend(["precision-recall.pdf",
1627+
"confusion-matrix.pdf",
1628+
"P-R-F1-average.pdf",
1629+
"P-R-F1-label.pdf",
1630+
"P-R-F1-model.pdf",
1631+
"PvR.pdf"])
16281632
for toplevelfile in toplevelfiles:
16291633
if not pdffile_succeeded(os.path.join(logdir, toplevelfile), reftime):
16301634
return False
1635+
if loss == 'autoencoder':
1636+
return True
1637+
traindirs = list(filter(lambda x: os.path.isdir(os.path.join(logdir,x)) and \
1638+
not x.startswith('summaries_'), os.listdir(logdir)))
16311639
one_fold_has_thresholds = False
16321640
for traindir in traindirs:
16331641
trainfiles = os.listdir(os.path.join(logdir,traindir))

src/gui/view.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ def recordings_update():
12191219
wavfiles.append(wavfile)
12201220
for wavfile in wavfiles:
12211221
M.used_sounds.append({'file': list(os.path.split(wavfile)),
1222-
'ticks': [1, 1], 'kind': '', 'label': ' '})
1222+
'ticks': [1, 1], 'kind': '', 'label': ''})
12231223
elif M.dfs:
12241224
wavfiles = set()
12251225
kinds = kinds_touse.value.split(',')
@@ -2173,7 +2173,7 @@ def init(_bokeh_document):
21732173

21742174
loss = Select(title="loss", height=50, \
21752175
value=M.state['loss'], \
2176-
options=["exclusive", "overlapped"])
2176+
options=["exclusive", "overlapped", "autoencoder"])
21772177
loss.on_change('value', lambda a,o,n: C.generic_parameters_callback(''))
21782178

21792179
learning_rate = TextInput(value=M.state['learning_rate'], \

src/lib.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def layout(nplots):
335335
return nrows, ncols
336336

337337

338-
def read_log(frompath, logfile):
338+
def read_log(frompath, logfile, loss='exclusive'):
339339
train_accuracy=[]; train_loss=[]; train_time=[]; train_step=[]
340340
validation_time=[]; validation_step=[]
341341
validation_precision=[]; validation_recall=[]
@@ -410,11 +410,14 @@ def read_log(frompath, logfile):
410410
conf_matrix_state=False
411411
confusion_string=""
412412
elif "Validation\n" in line:
413-
validation_precision.append(precision)
414-
validation_recall.append(recall)
415-
validation_precision_mean.append(precision_mean)
416-
validation_recall_mean.append(recall_mean)
417-
m=re.search('^([0-9.]+),([0-9]+),[0-9.]+ Validation$',line)
413+
m=re.search('^([0-9.]+),([0-9]+),([0-9.])+ Validation$',line)
414+
if loss != 'autoencoder':
415+
validation_precision.append(precision)
416+
validation_recall.append(recall)
417+
validation_precision_mean.append(precision_mean)
418+
validation_recall_mean.append(recall_mean)
419+
else:
420+
validation_recall_mean.append(float(m.group(3)))
418421
validation_time_value = float(m.group(1))
419422
if len(validation_time)>0 and \
420423
(validation_time_value+validation_restart_correction)<validation_time[-1]:
@@ -449,7 +452,7 @@ def read_log(frompath, logfile):
449452
#test_accuracy, \
450453

451454

452-
def read_logs(frompath):
455+
def read_logs(frompath, loss='exclusive'):
453456
train_accuracy={}; train_loss={}; train_time={}; train_step={}
454457
validation_precision={}; validation_recall={}
455458
validation_precision_mean={}; validation_recall_mean={}
@@ -474,7 +477,7 @@ def read_logs(frompath):
474477
labels_touse[model], label_counts[model], \
475478
nparameters_total[model], nparameters_finallayer[model], \
476479
batch_size[model], nlayers[model] = \
477-
read_log(frompath, logfile)
480+
read_log(frompath, logfile, loss)
478481
#test_accuracy[model], \
479482

480483
return train_accuracy, train_loss, train_time, train_step, \

0 commit comments

Comments
 (0)