Skip to content

Commit 9016139

Browse files
committed
fix selection of validation and test files
1 parent 52d0590 commit 9016139

2 files changed

Lines changed: 26 additions & 33 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ To train one classifier with a single recording or set of recordings withheld
11091109
for validation, first click on `Generalize` and then `Omit All`. Use the `File
11101110
Browser` to either select (1) specific WAV file(s), (2) a text file containing
11111111
a list of WAV file(s) (either comma separated or one per line), or (3) a
1112-
subdirectory within the `Ground Truth` folder. Finally press the `Validation
1112+
directory with WAV and annotated CSV files. Finally press the `Validation
11131113
Files` button and `DoIt!`.
11141114

11151115
To train multiple classifiers, each of which withholds a single recording in a

src/gui/controller.py

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,38 +1032,31 @@ async def misses_actuate():
10321032
misses_succeeded(w, t)))
10331033
asyncio.create_task(actuate_finalize(threads, results, V.groundtruth_update))
10341034

1035-
def __validation_test_files(folder, comma):
1036-
csvfiles = glob.glob("**/*csv", root_dir=folder, recursive=True)
1037-
csvfiles = list(filter(lambda x: 'oldfiles-' not in x and \
1038-
'congruence-' not in x, csvfiles))
1039-
dfs = []
1040-
for csvfile in csvfiles:
1041-
if os.path.getsize(os.path.join(folder, csvfile)) > 0:
1042-
dfs.append(pd.read_csv(os.path.join(folder, csvfile), header=None, index_col=False))
1043-
if dfs:
1044-
df = pd.concat(dfs)
1045-
wavfiles = sorted(list(set(df.loc[df[3]=="annotated"][0])))
1046-
return [','.join(wavfiles)] if comma else list(wavfiles)
1047-
1048-
def _validation_test_files(files_string, comma=True):
1049-
basepath = os.path.commonprefix((V.groundtruth_folder.value.rstrip(os.sep),
1050-
files_string.rstrip(os.sep)))
1051-
if basepath.startswith(V.groundtruth_folder.value.rstrip(os.sep)):
1052-
return __validation_test_files(files_string, comma)
1053-
elif files_string:
1054-
lastfile = files_string.split(',')[-1]
1055-
ext = os.path.splitext(M.audio_read_strip_rec(lastfile))[1]
1056-
if ext in M.audio_read_exts():
1057-
return [files_string] if comma else files_string.split(',')
1058-
elif os.path.isdir(files_string):
1059-
with open(files_string, "r") as fid:
1060-
wavfiles = fid.readlines()
1061-
wavfiles = [x.strip() for x in wavfiles]
1062-
return [','.join(wavfiles)] if comma else wavfiles
1063-
else:
1064-
bokehlog.info("ERROR: invalid value for validation / test files.")
1065-
else:
1066-
return ['']
1035+
def _validation_test_files(list_string, comma=True):
1036+
wavfiles = []
1037+
for elt in list_string.split(','):
1038+
if os.path.isdir(elt):
1039+
csvfiles = glob.glob("**/*csv", root_dir=elt, recursive=True)
1040+
csvfiles = list(filter(lambda x: 'oldfiles-' not in x and \
1041+
'congruence-' not in x, csvfiles))
1042+
dfs = []
1043+
for csvfile in csvfiles:
1044+
if os.path.getsize(os.path.join(elt, csvfile)) > 0:
1045+
dfs.append(pd.read_csv(os.path.join(elt, csvfile), header=None, index_col=False))
1046+
if dfs:
1047+
df = pd.concat(dfs)
1048+
wavfiles.extend(sorted(list(set(df.loc[df[3]=="annotated"][0]))))
1049+
continue
1050+
file_norec = M.audio_read_strip_rec(elt)
1051+
if file_norec and os.path.splitext(file_norec)[1] in M.audio_read_exts():
1052+
wavfiles.append(elt)
1053+
continue
1054+
if os.path.isfile(elt):
1055+
with open(elt, "r") as fid:
1056+
wavfiles.extend(os.path.basename(x.strip()) for x in fid.readlines())
1057+
continue
1058+
bokehlog.info("ERROR: invalid value for validation / test files-- "+elt)
1059+
return [','.join(wavfiles)] if comma else list(wavfiles)
10671060

10681061
def _train_succeeded(logdir, kind, model, reftime):
10691062
train_dir = os.path.join(logdir, kind+"_"+model)

0 commit comments

Comments
 (0)