2323# save input, hidden, and output layer activations at the time points annotated in the test set
2424
2525# e.g. activations \
26- # --context_ms =204.8 \
27- # --shiftby_ms =0.0 \
26+ # --context =204.8 \
27+ # --shiftby =0.0 \
2828# --loss=exclusive \
2929# --overlapped_prefix=not_ \
3030# --video_findfile=same-basename \
3434# --video_read_plugin=load-avi-mp4-mov \
3535# --video_read_plugin_kwargs="{}" \
3636# --model_architecture=convolutional \
37- # --model_parameters='{"representation":"waveform", "window_ms ":6.4, "stride_ms ":1.6, "mel_dct":"7,7", "dropout":0.5, "kernel_sizes":5,3,3", last_conv_width":130, "nfeatures":"256,256,256", "dilate_after_layer":65535, "stride_after_layer":65535, "connection_type":"plain"}' \
37+ # --model_parameters='{"representation":"waveform", "window ":6.4, "stride ":1.6, "mel_dct":"7,7", "dropout":0.5, "kernel_sizes":5,3,3", last_conv_width":130, "nfeatures":"256,256,256", "dilate_after_layer":65535, "stride_after_layer":65535, "connection_type":"plain"}' \
3838# --start_checkpoint=`pwd`/trained-classifier/train_1k/ckpt-50 \
3939# --data_dir=`pwd`/groundtruth-data \
4040# --labels_touse=mel-sine,mel-pulse,ambient,other \
4141# --kinds_touse=annotated \
4242# --testing_equalize_ratio=1000 \
4343# --testing_max_sounds=10000 \
4444# --batch_size=32 \
45+ # --time_units=ms \
46+ # --freq_units=Hz \
47+ # --time_scale=0.001 \
48+ # --freq_scale=1 \
4549# --audio_tic_rate=5000 \
4650# --audio_nchannels=1
4751# --igpu=0
@@ -103,18 +107,22 @@ def main():
103107 nlabels = len (labels )
104108
105109 model_settings = {'nlabels' : nlabels ,
110+ 'time_units' : FLAGS .time_units ,
111+ 'freq_units' : FLAGS .freq_units ,
112+ 'time_scale' : FLAGS .time_scale ,
113+ 'freq_scale' : FLAGS .freq_scale ,
106114 'audio_tic_rate' : FLAGS .audio_tic_rate ,
107115 'audio_nchannels' : FLAGS .audio_nchannels ,
108116 'video_frame_rate' : FLAGS .video_frame_rate ,
109117 'video_frame_width' : FLAGS .video_frame_width ,
110118 'video_frame_height' : FLAGS .video_frame_height ,
111119 'video_channels' : [int (x )- 1 for x in FLAGS .video_channels .split (',' )],
112120 'parallelize' : 1 ,
113- 'context_ms ' : FLAGS .context_ms }
121+ 'context ' : FLAGS .context }
114122
115123 audio_processor = data .AudioProcessor (
116124 FLAGS .data_dir ,
117- FLAGS .shiftby_ms ,
125+ FLAGS .shiftby ,
118126 FLAGS .labels_touse .split (',' ), FLAGS .kinds_touse .split (',' ),
119127 FLAGS .validation_percentage , FLAGS .validation_offset_percentage ,
120128 FLAGS .validation_files .split (',' ),
@@ -136,7 +144,7 @@ def main():
136144 checkpoint = tf .train .Checkpoint (thismodel = thismodel )
137145 checkpoint .read (FLAGS .start_checkpoint ).expect_partial ()
138146
139- time_shift_tics = int (( FLAGS .shiftby_ms * FLAGS .audio_tic_rate ) / 1000 )
147+ time_shift_tics = int (FLAGS .shiftby * FLAGS .audio_tic_rate * FLAGS . time_scale )
140148
141149 testing_set_size = audio_processor .set_size ('testing' )
142150 if testing_set_size == 0 :
@@ -203,7 +211,7 @@ if __name__ == '__main__':
203211 Where to download the speech training data to.
204212 """ )
205213 parser .add_argument (
206- '--shiftby_ms ' ,
214+ '--shiftby ' ,
207215 type = float ,
208216 default = 100.0 ,
209217 help = """\
@@ -259,6 +267,26 @@ if __name__ == '__main__':
259267 type = float ,
260268 default = 0 ,
261269 help = 'Which wavs to use as a cross-validation set.' )
270+ parser .add_argument (
271+ '--time_units' ,
272+ type = str ,
273+ default = "ms" ,
274+ help = 'Units of time' ,)
275+ parser .add_argument (
276+ '--freq_units' ,
277+ type = str ,
278+ default = "Hz" ,
279+ help = 'Units of frequency' ,)
280+ parser .add_argument (
281+ '--time_scale' ,
282+ type = float ,
283+ default = "ms" ,
284+ help = 'This many seconds are in time_units' ,)
285+ parser .add_argument (
286+ '--freq_scale' ,
287+ type = float ,
288+ default = "Hz" ,
289+ help = 'This many frequencies are in freq_units' ,)
262290 parser .add_argument (
263291 '--audio_tic_rate' ,
264292 type = int ,
@@ -290,7 +318,7 @@ if __name__ == '__main__':
290318 default = '1' ,
291319 help = 'Comma-separated list of which color channels in the video to use' ,)
292320 parser .add_argument (
293- '--context_ms ' ,
321+ '--context ' ,
294322 type = float ,
295323 default = 1000 ,
296324 help = 'Expected duration in milliseconds of the wavs' ,)
0 commit comments