From c68420e70d5f0132eaa4975bed8cdd51379c79c1 Mon Sep 17 00:00:00 2001 From: Seyed Yahya Shirazi Date: Wed, 13 May 2026 06:33:26 -0700 Subject: [PATCH] phase3: iclabel classification + non-brain IC flagging Phase 3 of the HBN ERSP epic. Per-subject pipeline: load Phase 2 .set -> pop_iclabel('default') -> flag ICs where brain probability < 0.69 (locked threshold) -> ic-classes bar + brain-IC topo PNG figures -> save .set checkpoint -> qa_iclabel.csv row src/matlab/phase3_iclabel.m (entrypoint) - BrainThreshold default 0.69 (mustBeInRange 0..1). IcLabelVersion 'default' (locked). MinBrainIcs default 5; subjects below threshold get status 'ok_low_brain' and are flagged for Phase 6 escalation per .context/research.md L92-94 watch list. - Flag, not delete: writes EEG.reject.gcompreject so Phase 4 can still inspect dropped ICs. src/matlab/+hbn/ helpers - run_iclabel.m : wraps pop_iclabel. Validates that EEG.etc.ic_classification.ICLabel is populated on return (the pop_iclabel docstring claims 'ic_classifications' (plural) but the code writes 'ic_classification' (singular); fixed locally with a clear assertion). - flag_non_brain_ics.m : validates class ordering (Brain, Muscle, Eye, Heart, Line Noise, Channel Noise, Other); writes EEG.reject.gcompreject; returns per-class counts (argmax-based). - save_ic_class_figure.m: two-panel PNG with per-class bar (brain split into kept vs dropped) plus brain-prob scatter with the 0.69 threshold line. - save_brain_ic_topo_figure.m: kept brain IC topographies on a tight tile grid. Uses the snapshot-handles pattern from Phase 2 so pop_topoplot's own figure gets captured (not a pre-allocated empty one). Emits a labelled placeholder PNG when no brain IC survives, instead of silently erroring on an unrecoverable empty find(). - write_qa_iclabel_csv.m: schema: participant_id, status, n_components, brain_count, muscle_count, eye_count, heart_count, line_noise_count, channel_noise_count, other_count, brain_threshold, iclabel_version, duration_s, error_message. tests/matlab/test_phase3_smoke.m - Chains phase1 -> phase2 (IcaMethod=runica, MaxIter=100) -> phase3 on the fixture. Asserts classifications attached, gcompreject length matches IC count, qa_iclabel.csv row sane, params.json schema complete. Allows 'ok' or 'ok_low_brain' status (the 60s/100-iter wiring smoke is structurally below the brain floor). tests/matlab/run_all_tests.m: adds test_phase3_smoke. scripts/run_phase3_three_subjects.m: production runner. Regenerates Phase 1+2 if checkpoints are missing (.set/.fdt are gitignored). Refs #1, refs #4. --- scripts/run_phase3_three_subjects.m | 54 +++++++ src/matlab/+hbn/flag_non_brain_ics.m | 60 +++++++ src/matlab/+hbn/run_iclabel.m | 40 +++++ src/matlab/+hbn/save_brain_ic_topo_figure.m | 60 +++++++ src/matlab/+hbn/save_ic_class_figure.m | 66 ++++++++ src/matlab/+hbn/write_qa_iclabel_csv.m | 83 ++++++++++ src/matlab/phase3_iclabel.m | 167 ++++++++++++++++++++ tests/matlab/run_all_tests.m | 3 +- tests/matlab/test_phase3_smoke.m | 101 ++++++++++++ 9 files changed, 633 insertions(+), 1 deletion(-) create mode 100644 scripts/run_phase3_three_subjects.m create mode 100644 src/matlab/+hbn/flag_non_brain_ics.m create mode 100644 src/matlab/+hbn/run_iclabel.m create mode 100644 src/matlab/+hbn/save_brain_ic_topo_figure.m create mode 100644 src/matlab/+hbn/save_ic_class_figure.m create mode 100644 src/matlab/+hbn/write_qa_iclabel_csv.m create mode 100644 src/matlab/phase3_iclabel.m create mode 100644 tests/matlab/test_phase3_smoke.m diff --git a/scripts/run_phase3_three_subjects.m b/scripts/run_phase3_three_subjects.m new file mode 100644 index 0000000..c0b0f7c --- /dev/null +++ b/scripts/run_phase3_three_subjects.m @@ -0,0 +1,54 @@ +function run_phase3_three_subjects +%RUN_PHASE3_THREE_SUBJECTS Run phase3_iclabel on the 3 Phase 2 subjects. +% Requires derivatives/amica/sub-*/eeg/*_desc-amica_eeg.set on disk. +% If those checkpoints are missing (they are gitignored), regenerates +% Phase 1 and Phase 2 from scratch first. +% +% Invoked from the worktree root: +% matlab -batch "addpath('scripts'); run_phase3_three_subjects()" + + eeglab_path = '/Users/yahya/Documents/git/eeg/eeglab'; + bids_root = '/Volumes/S1/Datasets/HBN/L100/R3_L100_bdf'; + + script_dir = fileparts(mfilename('fullpath')); + repo_root = fileparts(script_dir); + cd(repo_root); + + addpath(eeglab_path); + evalc('eeglab nogui'); + addpath(genpath(fullfile(repo_root, 'src', 'matlab'))); + + fprintf('[phase3-3subj] cwd: %s\n', pwd); + fprintf('[phase3-3subj] starting at %s\n', datestr(now, 'yyyy-mm-dd HH:MM:SS')); %#ok + + preprocDir = fullfile(repo_root, 'derivatives', 'preproc'); + amicaDir = fullfile(repo_root, 'derivatives', 'amica'); + iclabelDir = fullfile(repo_root, 'derivatives', 'iclabel'); + + amicaSets = dir(fullfile(amicaDir, 'sub-*', 'eeg', '*_desc-amica_eeg.set')); + if isempty(amicaSets) + fprintf('[phase3-3subj] no Phase 2 checkpoints on disk; regenerating Phase 1 + 2 ...\n'); + preprocSets = dir(fullfile(preprocDir, 'sub-*', 'eeg', '*_desc-clean_eeg.set')); + if isempty(preprocSets) + phase1_preprocess( ... + 'BidsRoot', bids_root, ... + 'OutDir', preprocDir, ... + 'SmokeSubjectCount', 3); + end + phase2_amica( ... + 'PreprocDir', preprocDir, ... + 'OutDir', amicaDir); + else + fprintf('[phase3-3subj] found %d Phase 2 checkpoint(s); skipping regenerate\n', ... + numel(amicaSets)); + end + + fprintf('[phase3-3subj] phase3_iclabel starting at %s\n', ... + datestr(now, 'yyyy-mm-dd HH:MM:SS')); %#ok + phase3_iclabel( ... + 'AmicaDir', amicaDir, ... + 'OutDir', iclabelDir); + + fprintf('[phase3-3subj] finished at %s\n', ... + datestr(now, 'yyyy-mm-dd HH:MM:SS')); %#ok +end diff --git a/src/matlab/+hbn/flag_non_brain_ics.m b/src/matlab/+hbn/flag_non_brain_ics.m new file mode 100644 index 0000000..c7da6f2 --- /dev/null +++ b/src/matlab/+hbn/flag_non_brain_ics.m @@ -0,0 +1,60 @@ +function [EEG, counts] = flag_non_brain_ics(EEG, brainThreshold) +%FLAG_NON_BRAIN_ICS Flag ICs whose brain probability is below threshold. +% +% [EEG, counts] = hbn.flag_non_brain_ics(EEG, brainThreshold) +% writes EEG.reject.gcompreject (1 x N_ic logical) with `true` for +% ICs to drop (those with ICLabel brain probability < brainThreshold). +% Returns a struct with per-class IC counts based on the argmax of +% the classification matrix. +% +% Flagging rather than deleting keeps the underlying IC structure +% intact, so downstream phases can inspect rejected ICs without +% re-running ICA. The pre-registered brain threshold for this +% project is 0.69 (see .context/ideas.md). + + arguments + EEG (1, 1) struct + brainThreshold (1, 1) double {mustBeInRange(brainThreshold, 0, 1)} + end + + if ~isfield(EEG, 'etc') || ~isfield(EEG.etc, 'ic_classification') || ... + ~isfield(EEG.etc.ic_classification, 'ICLabel') + error("hbn:phase3:flag:no_classifications", ... + "EEG.etc.ic_classification.ICLabel missing; run hbn.run_iclabel first."); + end + + probs = EEG.etc.ic_classification.ICLabel.classifications; + classes = EEG.etc.ic_classification.ICLabel.classes; + + expected_classes = {'Brain', 'Muscle', 'Eye', 'Heart', 'Line Noise', 'Channel Noise', 'Other'}; + if numel(classes) ~= numel(expected_classes) + error("hbn:phase3:flag:class_count_mismatch", ... + "Expected %d ICLabel classes, got %d", ... + numel(expected_classes), numel(classes)); + end + for k = 1:numel(expected_classes) + if ~strcmpi(classes{k}, expected_classes{k}) + error("hbn:phase3:flag:class_order_mismatch", ... + "Class %d expected '%s', got '%s'", ... + k, expected_classes{k}, classes{k}); + end + end + + brain_probs = probs(:, 1); + flag = brain_probs < brainThreshold; + + if ~isfield(EEG, 'reject') || isempty(EEG.reject) + EEG.reject = struct(); + end + EEG.reject.gcompreject = flag(:)'; + + [~, argmax] = max(probs, [], 2); + counts = struct( ... + 'brain_count', sum(argmax == 1 & ~flag), ... + 'muscle_count', sum(argmax == 2), ... + 'eye_count', sum(argmax == 3), ... + 'heart_count', sum(argmax == 4), ... + 'line_noise_count', sum(argmax == 5), ... + 'channel_noise_count', sum(argmax == 6), ... + 'other_count', sum(argmax == 7)); +end diff --git a/src/matlab/+hbn/run_iclabel.m b/src/matlab/+hbn/run_iclabel.m new file mode 100644 index 0000000..3023a11 --- /dev/null +++ b/src/matlab/+hbn/run_iclabel.m @@ -0,0 +1,40 @@ +function EEG = run_iclabel(EEG, version) +%RUN_ICLABEL Classify ICs via ICLabel and attach classifications to EEG. +% +% EEG = hbn.run_iclabel(EEG, version) runs pop_iclabel and returns the +% EEG struct with classifications stored at +% EEG.etc.ic_classification.ICLabel.classifications (N_ic x 7 matrix +% of probabilities) and EEG.etc.ic_classification.ICLabel.classes +% (cell array of class names: brain, muscle, eye, heart, line_noise, +% channel_noise, other). +% +% `version` is one of "default", "lite", "beta". The locked decision +% is "default" (validated in the original ICLabel publication). + + arguments + EEG (1, 1) struct + version (1, 1) string {mustBeMember(version, ["default", "lite", "beta"])} = "default" + end + + if exist('pop_iclabel', 'file') ~= 2 + error("hbn:phase3:iclabel:plugin_missing", ... + "pop_iclabel not on path; install the ICLabel plugin via EEGLAB."); + end + if isempty(EEG.icaweights) + error("hbn:phase3:iclabel:no_weights", ... + "EEG has no IC weights; run Phase 2 first."); + end + + try + EEG = pop_iclabel(EEG, char(version)); + catch ME + error("hbn:phase3:iclabel:run_failed", ... + "pop_iclabel failed: %s", ME.message); + end + + if ~isfield(EEG, 'etc') || ~isfield(EEG.etc, 'ic_classification') || ... + ~isfield(EEG.etc.ic_classification, 'ICLabel') + error("hbn:phase3:iclabel:no_output", ... + "pop_iclabel returned without populating ic_classification.ICLabel"); + end +end diff --git a/src/matlab/+hbn/save_brain_ic_topo_figure.m b/src/matlab/+hbn/save_brain_ic_topo_figure.m new file mode 100644 index 0000000..885b364 --- /dev/null +++ b/src/matlab/+hbn/save_brain_ic_topo_figure.m @@ -0,0 +1,60 @@ +function pngPath = save_brain_ic_topo_figure(EEG, outDir, subjId) +%SAVE_BRAIN_IC_TOPO_FIGURE Render the kept brain ICs as a topo montage. +% +% pngPath = hbn.save_brain_ic_topo_figure(EEG, outDir, subjId) +% writes /_brain-ic-topos.png with one tile per +% surviving brain IC (gcompreject==false and argmax class == Brain). +% If no brain IC survives, writes a placeholder text PNG so the file +% exists for downstream QA but signals the issue. + + arguments + EEG (1, 1) struct + outDir (1, 1) string + subjId (1, 1) string + end + + if ~isfolder(outDir); mkdir(outDir); end + probs = EEG.etc.ic_classification.ICLabel.classifications; + flag = logical(EEG.reject.gcompreject(:)); + [~, argmax] = max(probs, [], 2); + brainIcs = find(argmax == 1 & ~flag); + + pngPath = fullfile(outDir, sprintf("%s_brain-ic-topos.png", subjId)); + + if isempty(brainIcs) + fig = figure('Visible', 'off', 'Position', [100 100 800 400]); + cleaner = onCleanup(@() close(fig)); %#ok + axis off; + text(0.5, 0.5, sprintf('%s: no brain ICs above threshold', subjId), ... + 'HorizontalAlignment', 'center', 'FontSize', 18, 'Interpreter', 'none'); + exportgraphics(fig, pngPath, 'Resolution', 150); + return; + end + + handlesBefore = findall(groot, 'Type', 'figure'); + + nPlot = numel(brainIcs); + rows = ceil(sqrt(nPlot)); + cols = ceil(nPlot / rows); + pop_topoplot(EEG, 0, brainIcs', ... + sprintf('%s brain ICs (%d kept)', subjId, nPlot), ... + [rows cols], 0, 'electrodes', 'off'); + + handlesAfter = findall(groot, 'Type', 'figure'); + newHandles = setdiff(handlesAfter, handlesBefore); + if isempty(newHandles) + error("hbn:phase3:brain_topo:no_figure", ... + "pop_topoplot did not create a figure for %s", subjId); + end + fig = newHandles(end); + cleaner = onCleanup(@() close_if_valid(newHandles)); %#ok + + set(fig, 'Visible', 'off'); + exportgraphics(fig, pngPath, 'Resolution', 150); +end + +function close_if_valid(handles) + for h = handles(:)' + if isgraphics(h); close(h); end + end +end diff --git a/src/matlab/+hbn/save_ic_class_figure.m b/src/matlab/+hbn/save_ic_class_figure.m new file mode 100644 index 0000000..0c5c9b7 --- /dev/null +++ b/src/matlab/+hbn/save_ic_class_figure.m @@ -0,0 +1,66 @@ +function pngPath = save_ic_class_figure(EEG, outDir, subjId) +%SAVE_IC_CLASS_FIGURE Bar chart of IC class counts and rejection flags. +% +% Renders /_ic-classes.png with: +% - top: stacked bar of per-class IC counts based on argmax of +% ICLabel classification probabilities +% - bottom: scatter of brain probability vs IC index, with the +% rejection threshold drawn as a horizontal line + + arguments + EEG (1, 1) struct + outDir (1, 1) string + subjId (1, 1) string + end + + if ~isfolder(outDir); mkdir(outDir); end + probs = EEG.etc.ic_classification.ICLabel.classifications; + classes = EEG.etc.ic_classification.ICLabel.classes; + + flag = false(size(probs, 1), 1); + if isfield(EEG, 'reject') && isfield(EEG.reject, 'gcompreject') + flag = logical(EEG.reject.gcompreject(:)); + end + [~, argmax] = max(probs, [], 2); + + counts = zeros(numel(classes), 1); + for k = 1:numel(classes) + counts(k) = sum(argmax == k); + end + brain_kept = sum(argmax == 1 & ~flag); + brain_dropped = sum(argmax == 1 & flag); + + fig = figure('Visible', 'off', 'Position', [100 100 1000 700]); + cleaner = onCleanup(@() close(fig)); %#ok + + subplot(2, 1, 1); + bar_data = counts; + bar_data(1) = brain_kept; + extra = brain_dropped; + hold on; + bar(1:numel(classes), bar_data, 'FaceColor', [0.2 0.6 0.2]); + if extra > 0 + bar(1, brain_kept + extra, 'FaceColor', [0.7 0.2 0.2], 'FaceAlpha', 0.4); + bar(1, brain_kept, 'FaceColor', [0.2 0.6 0.2]); + end + set(gca, 'XTick', 1:numel(classes), 'XTickLabel', classes); + ylabel('IC count'); + title(sprintf('%s, IC class counts (brain kept: %d, brain dropped: %d)', ... + subjId, brain_kept, brain_dropped), 'Interpreter', 'none'); + grid on; + + subplot(2, 1, 2); + nIc = size(probs, 1); + hold on; + scatter(1:nIc, probs(:, 1), 36, 'filled', 'MarkerFaceColor', [0.2 0.4 0.8]); + yline(0.69, '--', 'brain threshold 0.69'); + xlabel('IC index'); + ylabel('Brain probability'); + ylim([0 1]); + xlim([0 nIc + 1]); + grid on; + title('Brain probability per IC'); + + pngPath = fullfile(outDir, sprintf("%s_ic-classes.png", subjId)); + exportgraphics(fig, pngPath, 'Resolution', 150); +end diff --git a/src/matlab/+hbn/write_qa_iclabel_csv.m b/src/matlab/+hbn/write_qa_iclabel_csv.m new file mode 100644 index 0000000..4fa0ba1 --- /dev/null +++ b/src/matlab/+hbn/write_qa_iclabel_csv.m @@ -0,0 +1,83 @@ +function csvPath = write_qa_iclabel_csv(outDir, row) +%WRITE_QA_ICLABEL_CSV Append a QA row to qa_iclabel.csv. +% Columns: participant_id, status, n_components, brain_count, +% muscle_count, eye_count, heart_count, line_noise_count, +% channel_noise_count, other_count, brain_threshold, +% iclabel_version, duration_s, error_message. +% +% status = "ok" when brain_count >= MinBrainIcs, +% "ok_low_brain" when below floor, +% "failed_" on per-subject failure. + + arguments + outDir (1, 1) string + row struct + end + if ~isfolder(outDir); mkdir(outDir); end + csvPath = fullfile(outDir, "qa_iclabel.csv"); + + header = ["participant_id", "status", "n_components", "brain_count", ... + "muscle_count", "eye_count", "heart_count", "line_noise_count", ... + "channel_noise_count", "other_count", "brain_threshold", ... + "iclabel_version", "duration_s", "error_message"]; + needsHeader = ~isfile(csvPath); + + fid = fopen(csvPath, "a"); + if fid < 0 + error("hbn:write_qa_iclabel_csv:open_failed", "could not open %s", csvPath); + end + cleanup = onCleanup(@() fclose(fid)); %#ok + + if needsHeader + fprintf(fid, "%s\n", strjoin(header, ",")); + end + + cells = [ ... + csv_escape(string(field_or_default(row, "participant_id", ""))), ... + csv_escape(string(field_or_default(row, "status", "unknown"))), ... + numeric_cell(field_or_default(row, "n_components", NaN)), ... + numeric_cell(field_or_default(row, "brain_count", NaN)), ... + numeric_cell(field_or_default(row, "muscle_count", NaN)), ... + numeric_cell(field_or_default(row, "eye_count", NaN)), ... + numeric_cell(field_or_default(row, "heart_count", NaN)), ... + numeric_cell(field_or_default(row, "line_noise_count", NaN)), ... + numeric_cell(field_or_default(row, "channel_noise_count", NaN)), ... + numeric_cell(field_or_default(row, "other_count", NaN)), ... + numeric_cell(field_or_default(row, "brain_threshold", NaN)), ... + csv_escape(string(field_or_default(row, "iclabel_version", ""))), ... + numeric_cell(field_or_default(row, "duration_s", NaN)), ... + csv_escape(string(field_or_default(row, "error_message", "")))]; + fprintf(fid, "%s\n", strjoin(cells, ",")); +end + +function v = field_or_default(s, name, default) + if isfield(s, name) + v = s.(name); + else + v = default; + end +end + +function c = numeric_cell(x) + if ~isnumeric(x) + error("hbn:write_qa_iclabel_csv:non_numeric", ... + "numeric_cell expected numeric, got %s", class(x)); + end + if isnan(x) + c = ""; + elseif x == floor(x) + c = sprintf("%d", x); + else + c = sprintf("%.4g", x); + end +end + +function out = csv_escape(s) + s = string(s); + needs = contains(s, ",") || contains(s, '"') || contains(s, newline); + if needs + out = """" + replace(s, """", """""") + """"; + else + out = s; + end +end diff --git a/src/matlab/phase3_iclabel.m b/src/matlab/phase3_iclabel.m new file mode 100644 index 0000000..cf2512d --- /dev/null +++ b/src/matlab/phase3_iclabel.m @@ -0,0 +1,167 @@ +function paramsPath = phase3_iclabel(opts) +%PHASE3_ICLABEL Classify ICs with ICLabel and flag non-brain components. +% paramsPath = phase3_iclabel() runs on every Phase 2 checkpoint under +% the default AmicaDir. +% paramsPath = phase3_iclabel(Name=Value,...) overrides defaults. +% +% Pipeline per subject: +% load Phase 2 .set checkpoint +% -> pop_iclabel('default') (classifications) +% -> flag ICs where brain probability < BrainThreshold (default 0.69) +% -> save IC class bar figure + brain-IC topo subset +% -> save checkpoint .set with classifications + flag +% -> qa_iclabel.csv row +% +% The locked decision per .context/ideas.md is to drop ICs whose brain +% class probability is below 0.69. We flag rather than physically +% remove so Phase 4 (epoching) can still inspect the dropped ICs if +% needed; flags live in EEG.reject.gcompreject and the rich +% classifications stay in EEG.etc.ic_classification.ICLabel. +% +% Outputs (rooted at opts.OutDir, default "derivatives/iclabel"): +% params.json +% qa_iclabel.csv +% sub-/eeg/sub-_task-_desc-iclabel_eeg.set +% sub-/figures/sub-_ic-classes.png +% sub-/figures/sub-_brain-ic-topos.png +% +% Name-value arguments: +% AmicaDir (1,1) string default "derivatives/amica" +% OutDir (1,1) string default "derivatives/iclabel" +% Task (1,1) string default "ThePresent" +% Subjects (1,:) string default string.empty +% BrainThreshold (1,1) double default 0.69 +% IcLabelVersion (1,1) string default "default" +% MinBrainIcs (1,1) double default 5 (subjects below get +% status="ok_low_brain") + + arguments + opts.AmicaDir (1, 1) string = "derivatives/amica" + opts.OutDir (1, 1) string = "derivatives/iclabel" + opts.Task (1, 1) string = "ThePresent" + opts.Subjects (1, :) string = string.empty + opts.BrainThreshold (1, 1) double {mustBeInRange(opts.BrainThreshold, 0, 1)} = 0.69 + opts.IcLabelVersion (1, 1) string {mustBeMember(opts.IcLabelVersion, ["default", "lite", "beta"])} = "default" + opts.MinBrainIcs (1, 1) double {mustBeNonnegative, mustBeInteger} = 5 + end + + if ~isfolder(opts.OutDir); mkdir(opts.OutDir); end + qaPath = fullfile(opts.OutDir, "qa_iclabel.csv"); + if isfile(qaPath); delete(qaPath); end + + setFiles = dir(fullfile(opts.AmicaDir, "sub-*", "eeg", "*_desc-amica_eeg.set")); + if isempty(setFiles) + error("hbn:phase3:no_inputs", ... + "No Phase 2 checkpoints found under %s/sub-*/eeg/*_desc-amica_eeg.set", ... + opts.AmicaDir); + end + + subjects_on_disk = strings(numel(setFiles), 1); + for k = 1:numel(setFiles) + parts = strsplit(setFiles(k).name, "_"); + subjects_on_disk(k) = parts(1); + end + [~, ord] = sort(subjects_on_disk); + setFiles = setFiles(ord); + subjects_on_disk = subjects_on_disk(ord); + + if ~isempty(opts.Subjects) + keep = ismember(subjects_on_disk, opts.Subjects); + setFiles = setFiles(keep); + subjects_on_disk = subjects_on_disk(keep); + if isempty(setFiles) + error("hbn:phase3:no_subjects", ... + "After filtering to Subjects=[%s], no checkpoints remain.", ... + strjoin(opts.Subjects, ", ")); + end + end + fprintf("[phase3] %d Phase 2 checkpoint(s) to classify\n", numel(setFiles)); + + nOk = 0; + nFailed = 0; + for i = 1:numel(setFiles) + subjId = subjects_on_disk(i); + fprintf("[phase3] (%d/%d) %s ...\n", i, numel(setFiles), subjId); + tStart = tic; + try + EEG = pop_loadset('filename', setFiles(i).name, 'filepath', setFiles(i).folder); + qaRow = process_one_subject(EEG, subjId, opts); + qaRow.participant_id = subjId; + qaRow.duration_s = toc(tStart); + qaRow.error_message = ""; + hbn.write_qa_iclabel_csv(opts.OutDir, qaRow); + nOk = nOk + 1; + catch ME + nFailed = nFailed + 1; + stage = extract_stage_from_error(ME); + warning("hbn:phase3:subject_failed", ... + "subject %s failed at stage %s: %s", subjId, stage, ME.message); + hbn.write_qa_iclabel_csv(opts.OutDir, struct( ... + 'participant_id', subjId, ... + 'status', "failed_" + stage, ... + 'n_components', NaN, ... + 'brain_count', NaN, ... + 'muscle_count', NaN, ... + 'eye_count', NaN, ... + 'heart_count', NaN, ... + 'line_noise_count', NaN, ... + 'channel_noise_count', NaN, ... + 'other_count', NaN, ... + 'brain_threshold', opts.BrainThreshold, ... + 'iclabel_version', opts.IcLabelVersion, ... + 'duration_s', toc(tStart), ... + 'error_message', ME.message)); + end + end + fprintf("[phase3] done. ok=%d failed=%d\n", nOk, nFailed); + + paramsPath = hbn.write_params_json(opts.OutDir, opts, struct( ... + 'n_subjects_input', numel(setFiles), ... + 'n_subjects_ok', nOk, ... + 'n_subjects_failed', nFailed)); +end + +function qaRow = process_one_subject(EEG, subjId, opts) + out_eeg_dir = fullfile(opts.OutDir, subjId, "eeg"); + out_fig_dir = fullfile(opts.OutDir, subjId, "figures"); + for d = [out_eeg_dir, out_fig_dir] + if ~isfolder(d); mkdir(d); end + end + + EEG = hbn.run_iclabel(EEG, opts.IcLabelVersion); + + [EEG, classCounts] = hbn.flag_non_brain_ics(EEG, opts.BrainThreshold); + + hbn.save_ic_class_figure(EEG, out_fig_dir, subjId); + hbn.save_brain_ic_topo_figure(EEG, out_fig_dir, subjId); + + out_set = sprintf("%s_task-%s_desc-iclabel_eeg.set", subjId, opts.Task); + pop_saveset(EEG, 'filename', char(out_set), 'filepath', char(out_eeg_dir)); + + status = "ok"; + if classCounts.brain_count < opts.MinBrainIcs + status = "ok_low_brain"; + end + + qaRow = struct( ... + 'status', status, ... + 'n_components', size(EEG.icaweights, 1), ... + 'brain_count', classCounts.brain_count, ... + 'muscle_count', classCounts.muscle_count, ... + 'eye_count', classCounts.eye_count, ... + 'heart_count', classCounts.heart_count, ... + 'line_noise_count', classCounts.line_noise_count, ... + 'channel_noise_count', classCounts.channel_noise_count, ... + 'other_count', classCounts.other_count, ... + 'brain_threshold', opts.BrainThreshold, ... + 'iclabel_version', opts.IcLabelVersion); +end + +function stage = extract_stage_from_error(ME) + parts = split(string(ME.identifier), ':'); + if numel(parts) >= 3 && parts(1) == "hbn" + stage = parts(3); + else + stage = "unknown"; + end +end diff --git a/tests/matlab/run_all_tests.m b/tests/matlab/run_all_tests.m index e0f70be..db4f560 100644 --- a/tests/matlab/run_all_tests.m +++ b/tests/matlab/run_all_tests.m @@ -8,7 +8,8 @@ @test_write_qa_channels_csv, ... @test_phase1_no_subjects, ... @test_phase1_smoke, ... - @test_phase2_smoke}; + @test_phase2_smoke, ... + @test_phase3_smoke}; names = strings(numel(tests), 1); passed = false(numel(tests), 1); diff --git a/tests/matlab/test_phase3_smoke.m b/tests/matlab/test_phase3_smoke.m new file mode 100644 index 0000000..c475105 --- /dev/null +++ b/tests/matlab/test_phase3_smoke.m @@ -0,0 +1,101 @@ +function test_phase3_smoke +%TEST_PHASE3_SMOKE Real-data smoke test for Phase 3 on a single subject. +% Chains phase1_preprocess -> phase2_amica (IcaMethod=runica) -> +% phase3_iclabel on the fixture. Asserts: +% - ICLabel classifications attached to EEG.etc.ic_classification +% - EEG.reject.gcompreject populated (length matches IC count) +% - qa_iclabel.csv has one row with consistent per-class counts +% - ic-classes and brain-ic-topos PNGs land in figures/ +% - params.json carries BrainThreshold + IcLabelVersion + tool versions +% +% This is a wiring smoke test. IcaMethod=runica + MaxIter=100 means +% the underlying decomposition is poor; do NOT interpret the brain_count +% from this test as a quality signal. The 3-subject real-data run on +% AMICA + 2000 iter is the place to read absolute brain-IC numbers. + + bidsRoot = test_bids_root(); + + testOut = string(tempname); + cleaner = onCleanup(@() rmdir_if_exists(testOut)); %#ok + preprocDir = fullfile(testOut, "preproc"); + amicaDir = fullfile(testOut, "amica"); + iclabelDir = fullfile(testOut, "iclabel"); + + phase1_preprocess( ... + BidsRoot = bidsRoot, ... + OutDir = preprocDir, ... + SmokeSubjectCount = 1); + + phase2_amica( ... + PreprocDir = preprocDir, ... + OutDir = amicaDir, ... + MaxIter = 100, ... + DoReject = false, ... + IcaMethod = "runica"); + + paramsPath = phase3_iclabel( ... + AmicaDir = amicaDir, ... + OutDir = iclabelDir); + + assert(isfile(paramsPath), "params.json missing at %s", paramsPath); + qaPath = fullfile(iclabelDir, "qa_iclabel.csv"); + assert(isfile(qaPath), "qa_iclabel.csv missing"); + + setFiles = dir(fullfile(iclabelDir, "sub-*", "eeg", "*_desc-iclabel_eeg.set")); + assert(numel(setFiles) == 1, ... + "expected 1 ICLabel checkpoint, got %d", numel(setFiles)); + + classPng = dir(fullfile(iclabelDir, "sub-*", "figures", "*_ic-classes.png")); + brainPng = dir(fullfile(iclabelDir, "sub-*", "figures", "*_brain-ic-topos.png")); + assert(numel(classPng) == 1, "expected 1 ic-classes PNG, got %d", numel(classPng)); + assert(numel(brainPng) == 1, "expected 1 brain-ic-topos PNG, got %d", numel(brainPng)); + + qa = readtable(qaPath, "TextType", "string", "VariableNamingRule", "preserve"); + assert(height(qa) == 1, "qa_iclabel.csv should have 1 row, has %d", height(qa)); + assert(ismember(qa.status(1), ["ok", "ok_low_brain"]), ... + "qa row status=%s expected ok or ok_low_brain", qa.status(1)); + assert(qa.n_components(1) > 0, "n_components should be positive"); + perClassSum = qa.brain_count(1) + qa.muscle_count(1) + qa.eye_count(1) + ... + qa.heart_count(1) + qa.line_noise_count(1) + qa.channel_noise_count(1) + ... + qa.other_count(1); + % brain_count in QA reflects kept brain ICs (argmax == brain AND not flagged). + % ICs whose argmax is brain but brain prob < threshold are flagged AND not + % counted in brain_count; they fall through to "other_count"? No, they + % stay in their argmax class but are flagged. So perClassSum may be less + % than n_components by the count of argmax-brain-but-flagged ICs. + assert(perClassSum <= qa.n_components(1), ... + "per-class sum %d should not exceed n_components %d", ... + perClassSum, qa.n_components(1)); + assert(qa.brain_threshold(1) == 0.69, ... + "brain_threshold=%g expected 0.69", qa.brain_threshold(1)); + + params = jsondecode(fileread(paramsPath)); + requiredFields = ["BrainThreshold", "IcLabelVersion", "MinBrainIcs", ... + "AmicaDir", "OutDir", "Task", ... + "matlab_version", "eeglab_version", "git_sha", "timestamp_iso", ... + "n_subjects_input", "n_subjects_ok", "n_subjects_failed"]; + missingFields = setdiff(requiredFields, string(fieldnames(params))); + assert(isempty(missingFields), ... + "params.json missing fields: %s", strjoin(missingFields, ", ")); + assert(params.BrainThreshold == 0.69, "BrainThreshold should default to 0.69"); + + EEG = pop_loadset('filename', setFiles(1).name, 'filepath', setFiles(1).folder); + assert(isfield(EEG.etc, 'ic_classification'), ... + "EEG.etc.ic_classification missing"); + assert(isfield(EEG.etc.ic_classification, 'ICLabel'), ... + "EEG.etc.ic_classification.ICLabel missing"); + assert(size(EEG.etc.ic_classification.ICLabel.classifications, 1) == size(EEG.icaweights, 1), ... + "classifications row count must match IC count"); + assert(isfield(EEG.reject, 'gcompreject') && ... + numel(EEG.reject.gcompreject) == size(EEG.icaweights, 1), ... + "EEG.reject.gcompreject must be a vector of length n_components"); + + fprintf("test_phase3_smoke: OK (%d ICs, %d brain kept, %d flagged)\n", ... + size(EEG.icaweights, 1), qa.brain_count(1), sum(EEG.reject.gcompreject)); +end + +function rmdir_if_exists(p) + if isfolder(p) + rmdir(p, 's'); + end +end