Skip to content

Commit 8d383a1

Browse files
committed
hoist units and scales to configuration.py
1 parent a4a3ebd commit 8d383a1

34 files changed

Lines changed: 1006 additions & 634 deletions

configuration.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,32 @@
3838
# https://graphicdesign.stackexchange.com/questions/3682/where-can-i-find-a-large-palette-set-of-contrasting-colors-for-coloring-many-d
3939
gui_label_palette="('#0075dc','#993f00','#4c005c','#191919','#005c31','#2bce48','#ffcc99','#808080','#94ffb5','#8f7c00','#9dcc00','#c20088','#003380','#ffa405','#ffa8bb','#426600','#ff0010','#5ef1f2','#00998f','#e0ff66','#740aff','#990000','#ffff80','#ffff00','#ff5005')"
4040
gui_cluster_circle_color="#f0a3ff"
41+
gui_time_units="ms"
42+
gui_time_scale=0.001
43+
gui_freq_units="Hz"
44+
gui_freq_scale=1
4145
gui_snippets_colormap="Viridis256"
42-
gui_snippets_width_ms=40
46+
gui_snippets_width_sec=0.04
4347
gui_snippets_nx=10
4448
gui_snippets_ny=5
4549
gui_snippets_waveform=1 # comma-separated list of channels to display, or () if none
4650
gui_snippets_spectrogram=1 # comma-separated list of channels to display, or () if none
47-
gui_context_width_ms=400
48-
gui_context_offset_ms=0
51+
gui_context_time_units="sec"
52+
gui_context_time_scale=1
53+
gui_context_freq_units="kHz"
54+
gui_context_freq_scale=1000
55+
gui_context_width_sec=0.4
56+
gui_context_offset_sec=0
4957
gui_context_waveform=1 # comma-separated list of channels to display, or () if none
5058
gui_context_waveform_height_pix=150
5159
gui_context_spectrogram=1 # comma-separated list of channels to display, or () if none
5260
gui_context_spectrogram_height_pix=150
53-
gui_context_spectrogram_units="kHz" # one of mHz, Hz, kHz, or MHz
5461
gui_context_probability_height_pix=75
5562
gui_context_undo_proximity_pix=3
5663
gui_context_doubleclick_plugin="point"
5764
gui_spectrogram_colormap="Viridis256"
5865
gui_spectrogram_window="hann"
59-
gui_spectrogram_length_ms=10
66+
gui_spectrogram_length_sec=0.010
6067
gui_spectrogram_overlap=0.5
6168
gui_spectrogram_low_hz=0
6269
gui_spectrogram_high_hz=1250

src/activations

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
# save input, hidden, and output layer activations at the time points annotated in the test set
2424

2525
# e.g. activations \
26-
# --context_ms=204.8 \
27-
# --shiftby_ms=0.0 \
26+
# --context=204.8 \
27+
# --shiftby=0.0 \
2828
# --loss=exclusive \
2929
# --overlapped_prefix=not_ \
3030
# --video_findfile=same-basename \
@@ -34,14 +34,18 @@
3434
# --video_read_plugin=load-avi-mp4-mov \
3535
# --video_read_plugin_kwargs="{}" \
3636
# --model_architecture=convolutional \
37-
# --model_parameters='{"representation":"waveform", "window_ms":6.4, "stride_ms":1.6, "mel_dct":"7,7", "dropout":0.5, "kernel_sizes":5,3,3", last_conv_width":130, "nfeatures":"256,256,256", "dilate_after_layer":65535, "stride_after_layer":65535, "connection_type":"plain"}' \
37+
# --model_parameters='{"representation":"waveform", "window":6.4, "stride":1.6, "mel_dct":"7,7", "dropout":0.5, "kernel_sizes":5,3,3", last_conv_width":130, "nfeatures":"256,256,256", "dilate_after_layer":65535, "stride_after_layer":65535, "connection_type":"plain"}' \
3838
# --start_checkpoint=`pwd`/trained-classifier/train_1k/ckpt-50 \
3939
# --data_dir=`pwd`/groundtruth-data \
4040
# --labels_touse=mel-sine,mel-pulse,ambient,other \
4141
# --kinds_touse=annotated \
4242
# --testing_equalize_ratio=1000 \
4343
# --testing_max_sounds=10000 \
4444
# --batch_size=32 \
45+
# --time_units=ms \
46+
# --freq_units=Hz \
47+
# --time_scale=0.001 \
48+
# --freq_scale=1 \
4549
# --audio_tic_rate=5000 \
4650
# --audio_nchannels=1
4751
# --igpu=0
@@ -103,18 +107,22 @@ def main():
103107
nlabels = len(labels)
104108

105109
model_settings = {'nlabels': nlabels,
110+
'time_units': FLAGS.time_units,
111+
'freq_units': FLAGS.freq_units,
112+
'time_scale': FLAGS.time_scale,
113+
'freq_scale': FLAGS.freq_scale,
106114
'audio_tic_rate': FLAGS.audio_tic_rate,
107115
'audio_nchannels': FLAGS.audio_nchannels,
108116
'video_frame_rate': FLAGS.video_frame_rate,
109117
'video_frame_width': FLAGS.video_frame_width,
110118
'video_frame_height': FLAGS.video_frame_height,
111119
'video_channels': [int(x)-1 for x in FLAGS.video_channels.split(',')],
112120
'parallelize': 1,
113-
'context_ms': FLAGS.context_ms}
121+
'context': FLAGS.context}
114122

115123
audio_processor = data.AudioProcessor(
116124
FLAGS.data_dir,
117-
FLAGS.shiftby_ms,
125+
FLAGS.shiftby,
118126
FLAGS.labels_touse.split(','), FLAGS.kinds_touse.split(','),
119127
FLAGS.validation_percentage, FLAGS.validation_offset_percentage,
120128
FLAGS.validation_files.split(','),
@@ -136,7 +144,7 @@ def main():
136144
checkpoint = tf.train.Checkpoint(thismodel=thismodel)
137145
checkpoint.read(FLAGS.start_checkpoint).expect_partial()
138146

139-
time_shift_tics = int((FLAGS.shiftby_ms * FLAGS.audio_tic_rate) / 1000)
147+
time_shift_tics = int(FLAGS.shiftby * FLAGS.audio_tic_rate * FLAGS.time_scale)
140148

141149
testing_set_size = audio_processor.set_size('testing')
142150
if testing_set_size==0:
@@ -203,7 +211,7 @@ if __name__ == '__main__':
203211
Where to download the speech training data to.
204212
""")
205213
parser.add_argument(
206-
'--shiftby_ms',
214+
'--shiftby',
207215
type=float,
208216
default=100.0,
209217
help="""\
@@ -259,6 +267,26 @@ if __name__ == '__main__':
259267
type=float,
260268
default=0,
261269
help='Which wavs to use as a cross-validation set.')
270+
parser.add_argument(
271+
'--time_units',
272+
type=str,
273+
default="ms",
274+
help='Units of time',)
275+
parser.add_argument(
276+
'--freq_units',
277+
type=str,
278+
default="Hz",
279+
help='Units of frequency',)
280+
parser.add_argument(
281+
'--time_scale',
282+
type=float,
283+
default="ms",
284+
help='This many seconds are in time_units',)
285+
parser.add_argument(
286+
'--freq_scale',
287+
type=float,
288+
default="Hz",
289+
help='This many frequencies are in freq_units',)
262290
parser.add_argument(
263291
'--audio_tic_rate',
264292
type=int,
@@ -290,7 +318,7 @@ if __name__ == '__main__':
290318
default='1',
291319
help='Comma-separated list of which color channels in the video to use',)
292320
parser.add_argument(
293-
'--context_ms',
321+
'--context',
294322
type=float,
295323
default=1000,
296324
help='Expected duration in milliseconds of the wavs',)

src/architecture-plugin.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,15 @@ def callback(n,M,V,C):
2929
_callback('a-bounded-value',M,V,C)
3030

3131
# a list of lists specifying the architecture-specific hyperparameters in the GUI
32-
model_parameters = [
33-
# [key, title in GUI, "" for textbox or [] for pull-down, default value, width, enable logic, callback, required]
34-
["my-simple-textbox", "h-parameter 1", "", "32", 1, [], None, True],
35-
["a-bounded-value", "can't be < 0", "", "3", 1, [], callback, True],
36-
["a-menu", "choose one", ["this","that"], "this", 1, [], None, True],
37-
["a-conditional-param", "that's parameter", "", "8", 1, ["a-menu",["that"]], None, True],
38-
["an-optional-param", "can be blank", "", "0.5", 1, [], None, False],
39-
]
32+
def model_parameters(time_units, freq_units, time_scale, freq_scale):
33+
return [
34+
# [key, title in GUI, "" for textbox or [] for pull-down, default value, width, enable logic, callback, required]
35+
["my-simple-textbox", "h-parameter 1", "", "32", 1, [], None, True],
36+
["a-bounded-value", "can't be < 0", "", "3", 1, [], callback, True],
37+
["a-menu", "choose one", ["this","that"], "this", 1, [], None, True],
38+
["a-conditional-param", "that's parameter", "", "8", 1, ["a-menu",["that"]], None, True],
39+
["an-optional-param", "can be blank", "", "0.5", 1, [], None, False],
40+
]
4041

4142
# define custom keras layers by sub-classing Layer and wrapping tf functions
4243
# call with MyLayer(arg1, arg2)(previous_layer) as usual

src/classify

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
# generate .wav files of per-label probabilities
2424

2525
# e.g. classify \
26-
# --context_ms=204.8 \
27-
# --shiftby_ms=0.0 \
26+
# --context=204.8 \
27+
# --shiftby=0.0 \
28+
# --time_scale=0.001 \
29+
# --audio_tic_rate=2500 \
2830
# --video_findfile=same-basename \
2931
# --audio_read_plugin=load-wav \
3032
# --audio_read_plugin_kwargs="{}" \
@@ -182,12 +184,12 @@ def main():
182184
len(video_channels)),
183185
dtype=np.float32)
184186

185-
context_samples = int(FLAGS.context_ms * data_sample_rate / 1000)
187+
context_samples = int(FLAGS.context * FLAGS.time_scale * data_sample_rate)
186188
stride_x_downsample_samples = (clip_window_samples - context_samples) // (FLAGS.parallelize-1)
187189
clip_stride_samples = stride_x_downsample_samples * FLAGS.parallelize
188190

189-
stride_x_downsample_ms = stride_x_downsample_samples / data_sample_rate * 1000
190-
npadding = round((FLAGS.context_ms/2 + FLAGS.shiftby_ms) / stride_x_downsample_ms)
191+
stride_x_downsample_sec = stride_x_downsample_samples / data_sample_rate
192+
npadding = round((FLAGS.context / 2 + FLAGS.shiftby) * FLAGS.time_scale / stride_x_downsample_sec)
191193
probability_list = [np.zeros((npadding, len(labels)), dtype=np.float32)]
192194

193195
# Inference along audio stream.
@@ -220,16 +222,16 @@ def main():
220222
inputs = tf.expand_dims(video_slice, 0)
221223
_,outputs = recognize_graph(inputs)
222224

223-
current_time_ms = np.round(data_offset_samples * 1000 / data_sample_rate).astype(int)
225+
current_time_sec = np.round(data_offset_samples / data_sample_rate).astype(int)
224226
if pad_len>0:
225227
discard_len = np.ceil(pad_len/stride_x_downsample_samples).astype(int)
226228
probability_list.append(np.array(outputs.numpy()[0,:-discard_len,:]))
227229
break
228230
else:
229231
probability_list.append(np.array(outputs.numpy()[0,:,:]))
230232

231-
sample_rate = round(1000/stride_x_downsample_ms)
232-
if sample_rate != 1000/stride_x_downsample_ms:
233+
sample_rate = round(1/stride_x_downsample_sec)
234+
if sample_rate != 1/stride_x_downsample_sec:
233235
print('WARNING: .wav files do not support fractional sampling rates!')
234236

235237
probability_matrix = np.concatenate(probability_list)
@@ -273,17 +275,22 @@ if __name__ == '__main__':
273275
choices=['exclusive', 'overlapped'],
274276
help='Sigmoid cross entropy is used for "overlapped" labels while softmax cross entropy is used for "exclusive" labels.')
275277
parser.add_argument(
276-
'--context_ms',
278+
'--context',
277279
type=float,
278280
default=1000,
279281
help='Length of each audio clip fed into model.')
280282
parser.add_argument(
281-
'--shiftby_ms',
283+
'--shiftby',
282284
type=float,
283285
default=100.0,
284286
help="""\
285287
Range to shift the training audio by in time.
286288
""")
289+
parser.add_argument(
290+
'--time_scale',
291+
type=float,
292+
default="ms",
293+
help='This many seconds are in time_units',)
287294
parser.add_argument(
288295
'--audio_tic_rate',
289296
type=int,

src/congruence

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# --topath=/groups/stern/sternlab/behavior/arthurb/groundtruth/kyriacou2017/congruence-20240718T091400 \
88
# --wavfiles=PS_20130625111709_ch3.wav,PS_20130625111709_ch7.wav \
99
# --portion=union \
10-
# --convolve_ms=0 \
10+
# --convolve_sec=0 \
1111
# --measure=label \
1212
# --nprobabilities=20 \
1313
# --audio_tic_rate=2500 \
@@ -144,7 +144,7 @@ def main():
144144
do_tic = FLAGS.measure=="both" or FLAGS.measure=="tic"
145145
do_label = FLAGS.measure=="both" or FLAGS.measure=="label"
146146

147-
convolve_tic = int(FLAGS.convolve_ms/2/1000*FLAGS.audio_tic_rate)
147+
convolve_tic = int(FLAGS.convolve_sec/2*FLAGS.audio_tic_rate)
148148

149149
wavdirs = {}
150150
def traverse(curdir):
@@ -689,7 +689,7 @@ if __name__ == "__main__":
689689
'--portion',
690690
type=str)
691691
parser.add_argument(
692-
'--convolve_ms',
692+
'--convolve_sec',
693693
type=float)
694694
parser.add_argument(
695695
'--measure',

0 commit comments

Comments
 (0)