Skip to content
36 changes: 36 additions & 0 deletions modules/local/cluster_metrics.nf
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot have a "local" module to be added to an nf-core subworkflow :) The module needs to be added following the nf-core guidelines and then it can be used in the subworkflow!

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
process CLUSTER_METRICS {
tag "${meta.id}"
label 'process_medium'
container "${params.python_container ?: params.container_py ?: 'snpclustering-py:latest'}"
publishDir "${params.outdir}/metrics", mode: 'copy'

input:
tuple val(meta), path(pca_scores), path(pca_info), path(clusters)
path cluster_metrics_script

output:
tuple val(meta), path("${meta.id}_metrics.tsv"), emit: metrics
tuple val(meta), path("${meta.id}_k_sweep.csv"), emit: k_sweep
tuple val(meta), path("${meta.id}_selected.json"), emit: selected
path "versions.yml", emit: versions

script:
def k_min = params.k_min ?: 2
def k_max = params.k_max ?: 12

"""
python3 ${cluster_metrics_script} \\
--features ${pca_scores} \\
--clusters ${clusters} \\
--k-min ${k_min} \\
--k-max ${k_max} \\
--out-k-sweep ${meta.id}_k_sweep.csv \\
--out-selected ${meta.id}_selected.json \\
--out-prefix ${meta.id}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
END_VERSIONS
"""
}
47 changes: 47 additions & 0 deletions modules/local/cluster_viz.nf
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above comment

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process CLUSTER_VIZ {
tag "${meta.id}"
label 'process_medium'
container "${params.python_container ?: params.container_py ?: 'snpclustering-py:latest'}"
publishDir "${params.outdir}/viz", mode: 'copy'

input:
tuple val(meta), path(pca_scores), path(pca_info), path(clusters)
path cluster_viz_script

output:
tuple val(meta), path("${meta.id}_umap.png"),
path("${meta.id}_tsne.png"),
path("${meta.id}_pca.png"), emit: plots
tuple val(meta), path("${meta.id}_umap.tsv"), emit: umap_tsv
tuple val(meta), path("${meta.id}_tsne.tsv"), emit: tsne_tsv
path "versions.yml", emit: versions

script:
def umap_n = params.viz_umap_neighbors ?: 15
def umap_d = params.viz_umap_min_dist ?: 0.1
def tsne_p = params.viz_perplexity ?: 30
def tsne_i = params.viz_tsne_iter ?: 1000

"""
export NUMBA_DISABLE_JIT=1

python3 ${cluster_viz_script} \\
--features ${pca_scores} \\
--clusters ${clusters} \\
--pca-scores ${pca_scores} \\
--umap-neighbors ${umap_n} \\
--umap-min-dist ${umap_d} \\
--tsne-perplexity ${tsne_p} \\
--tsne-iter ${tsne_i} \\
--out-umap-tsv ${meta.id}_umap.tsv \\
--out-tsne-tsv ${meta.id}_tsne.tsv \\
--out-umap-png ${meta.id}_umap.png \\
--out-tsne-png ${meta.id}_tsne.png \\
--out-pca-png ${meta.id}_pca.png

cat <<-END_VERSIONS > versions.yml
"${task.process}":
scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
END_VERSIONS
"""
}
39 changes: 39 additions & 0 deletions modules/local/clustering.nf
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above comment

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process CLUSTERING {
tag "${meta.id}"
label 'process_medium'
container "${params.python_container}"
publishDir "${params.outdir}/clustering", mode: 'copy'

input:
tuple val(meta), path(pca_scores), path(pca_info)
path clustering_script

output:
tuple val(meta), path('clusters.csv'), emit: clusters
tuple val(meta), path('clustering_info.json'), emit: clustering_info

script:
def algorithm = params.algorithm ?: 'kmeans'
def n_init = params.n_init ?: 100
def init_meth = params.init_method ?: 'random'

def extra_args = ''
if( algorithm == 'kmeans' ) {
def k = params.n_clusters ?: 3
extra_args = "--k ${k} --n_init ${n_init} --init-method ${init_meth}"
}
else if( algorithm == 'dbscan' ) {
def eps = params.dbscan_eps
def mins = params.dbscan_min_samples
extra_args = "--dbscan-eps ${eps} --dbscan-min-samples ${mins}"
}

"""
python3 ${clustering_script} \\
--features ${pca_scores} \\
--algorithm ${algorithm} \\
${extra_args} \\
--out-clusters clusters.csv \\
--out-info clustering_info.json
"""
}
63 changes: 63 additions & 0 deletions modules/local/pca.nf
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above comment

Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
process PCA_FLASHPCA {
tag "${meta.id}"
cpus { params.threads as int }
container params.flashpca_container
publishDir "${params.outdir}/04_pca", mode: 'copy'

input:
tuple val(meta), path(bed), path(bim), path(fam)
path parser_script

output:
tuple val(meta), path('features.tsv'), path('scaled.tsv'), path('pca_scores.tsv'), path('pca_info.json'), emit: pca
path 'versions.yml', emit: versions

script:
def flashpca_bin = params.flashpca_bin ?: 'flashpca'
"""
set -euo pipefail

command -v ${flashpca_bin} >/dev/null 2>&1 || { echo "ERROR: '${flashpca_bin}' not found in PATH" >&2; exit 127; }
command -v python3 >/dev/null 2>&1 || { echo "ERROR: 'python3' not found" >&2; exit 127; }

prefix="${bed.baseName}"

${flashpca_bin} \\
--bfile "\$prefix" \\
--ndim ${params.n_pcs ?: 40} \\
--numthreads ${task.cpus} \\
--outpc outpc.raw \\
--outmeansd scaled.tsv

awk '{print \$2}' "${bim}" > features.tsv

python3 ${parser_script} \\
--outpc outpc.raw \\
--n-pcs ${params.n_pcs ?: 40} \\
--out-pca pca_scores.tsv \\
--out-info pca_info.json \\
--id-mode fid_iid

cat <<-END_VERSIONS > versions.yml
"${task.process}":
flashpca: \$(${flashpca_bin} --version 2>&1 | head -n 1 || true)
python3: \$(python3 --version 2>&1)
END_VERSIONS
"""
}

workflow pca_ch {
take:
plink_ch

main:
parser_script_ch = Channel.value(
file("${projectDir}/subworkflows/nf-core/snpclustering/scripts/flashpca_outpc_to_tsv.py", checkIfExists: true)
)

PCA_FLASHPCA(plink_ch, parser_script_ch)

emit:
pca = PCA_FLASHPCA.out.pca
versions = PCA_FLASHPCA.out.versions
}
38 changes: 38 additions & 0 deletions modules/local/plink2_pgen2bed.nf
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above comment

You already reuse plink/vcf, it might be worth building (or reusing if it exists) the plink2/pgen2bed module. I would ask you to do that in a separate PR to keep the review load small :)

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process PLINK2_PGEN2BED {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/plink2:2.00a5.10--h4ac6f70_0' :
params.plink2_container }"

input:
tuple val(meta), path(pgen), path(pvar), path(psam)

output:
tuple val(meta), path("*.bed"), emit: bed
tuple val(meta), path("*.bim"), emit: bim
tuple val(meta), path("*.fam"), emit: fam
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: meta.id

"""
plink2 \\
--pfile ${prefix} \\
--max-alleles 2 \\
--make-bed \\
--threads ${task.cpus} \\
--out ${prefix}_bed

cat <<-END_VERSIONS > versions.yml
"${task.process}":
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//')
END_VERSIONS
"""
}
Loading
Loading