diff --git a/modules/nf-core/huggingface/download/environment.yml b/modules/nf-core/huggingface/download/environment.yml
new file mode 100644
index 00000000000..f2267b412f6
--- /dev/null
+++ b/modules/nf-core/huggingface/download/environment.yml
@@ -0,0 +1,6 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+dependencies:
+  - conda-forge::huggingface_hub=1.6.0
diff --git a/modules/nf-core/huggingface/download/main.nf b/modules/nf-core/huggingface/download/main.nf
new file mode 100644
index 00000000000..095a4d472e6
--- /dev/null
+++ b/modules/nf-core/huggingface/download/main.nf
@@ -0,0 +1,29 @@
+process HUGGINGFACE_DOWNLOAD {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "community.wave.seqera.io/library/huggingface_hub:1.6.0--c106a7f9664ca39b"
+
+    input:
+    tuple val(meta), val(hf_repo), val(hf_file), val(hf_home)
+
+    output:
+    tuple val(meta), path(hf_file), emit: output
+    tuple val("${task.process}"), val("huggingface_hub"), eval("hf --version 2>&1 | tail -n1 | awk '{print \$NF}'"), topic: versions, emit: versions_huggingface_hub
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    export HF_HOME="${hf_home}"
+    export HF_HUB_CACHE=\$HF_HOME
+    hf download ${hf_repo} ${hf_file} --local-dir \$PWD
+    """
+
+    stub:
+    """
+    touch ${hf_file}
+    """
+}
diff --git a/modules/nf-core/huggingface/download/meta.yml b/modules/nf-core/huggingface/download/meta.yml
new file mode 100644
index 00000000000..f961aa7458d
--- /dev/null
+++ b/modules/nf-core/huggingface/download/meta.yml
@@ -0,0 +1,69 @@
+name: huggingface_download
+description: Command-line interface for downloading models in GGUF format from Hugging Face Hub
+keywords:
+  - gguf
+  - inference
+  - llama
+  - llm
+  - local-inference
+  - offline-llm
+tools:
+  - huggingface_hub:
+      description: "Command-line interface for interacting with Hugging Face Hub, allowing to download, upload and interact with models and datasets"
+      homepage: "https://huggingface.co/docs/huggingface_hub/guides/cli"
+      licence:
+        - "MIT"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`- prompt_file:
+    - hf_repo:
+        type: string
+        description: Hugging Face repository
+    - hf_file:
+        type: string
+        description: Hugging Face GGUF file
+    - hf_home:
+        type: string
+        description: Hugging Face default cache directory
+output:
+  output:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - hf_file:
+          type: file
+          description: Downloaded Hugging Face GGUF file
+          ontologies: []
+  versions_huggingface_hub:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - huggingface_hub:
+          type: string
+          description: The name of the tool
+      - hf --version 2>&1 | tail -n1 | awk '{print \$NF}':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - huggingface_hub:
+          type: string
+          description: The name of the tool
+      - hf --version 2>&1 | tail -n1 | awk '{print \$NF}':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@toniher"
+  - "@lucacozzuto"
+maintainers:
+  - "@toniher"
+  - "@lucacozzuto"
diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test b/modules/nf-core/huggingface/download/tests/main.nf.test
new file mode 100644
index 00000000000..74cb97ec5c2
--- /dev/null
+++ b/modules/nf-core/huggingface/download/tests/main.nf.test
@@ -0,0 +1,94 @@
+nextflow_process {
+
+    name "Test Process HUGGINGFACE_DOWNLOAD"
+    script "../main.nf"
+    process "HUGGINGFACE_DOWNLOAD"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "huggingface"
+    tag "huggingface/download"
+
+    test("download gguf file - gemma3") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_model_gemma3' ],
+                    "ggml-org/gemma-3-1b-it-GGUF",
+                    "gemma-3-1b-it-Q4_K_M.gguf",
+                    "./hf_cache"
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.output.size(),
+                    process.out.output[0][0],
+                    file(process.out.output[0][1]).name,
+                    file(process.out.output[0][1]).size() > 0,
+                    process.out.findAll { key, val -> key.startsWith('versions') }
+                ).match() }
+            )
+        }
+    }
+
+    test("download gguf file - smollm3") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_model_smollm3' ],
+                    "unsloth/SmolLM3-3B-GGUF",
+                    "SmolLM3-3B-UD-IQ2_XXS.gguf",
+                    "./hf_cache"
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.output.size(),
+                    process.out.output[0][0],
+                    file(process.out.output[0][1]).name,
+                    file(process.out.output[0][1]).size() > 0,
+                    process.out.findAll { key, val -> key.startsWith('versions') }
+                ).match() }
+            )
+        }
+    }
+
+    test("stub - download gguf file - gemma3") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_model_gemma3' ],
+                    "ggml-org/gemma-3-1b-it-GGUF",
+                    "gemma-3-1b-it-Q4_K_M.gguf",
+                    "./hf_cache"
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(sanitizeOutput(process.out)).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test.snap b/modules/nf-core/huggingface/download/tests/main.nf.test.snap
new file mode 100644
index 00000000000..145bf736d33
--- /dev/null
+++ b/modules/nf-core/huggingface/download/tests/main.nf.test.snap
@@ -0,0 +1,76 @@
+{
+    "stub - download gguf file - gemma3": {
+        "content": [
+            {
+                "output": [
+                    [
+                        {
+                            "id": "test_model_gemma3"
+                        },
+                        "gemma-3-1b-it-Q4_K_M.gguf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions_huggingface_hub": [
+                    [
+                        "HUGGINGFACE_DOWNLOAD",
+                        "huggingface_hub",
+                        "1.6.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-04-04T22:00:17.896195894",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    },
+    "download gguf file - gemma3": {
+        "content": [
+            1,
+            {
+                "id": "test_model_gemma3"
+            },
+            "gemma-3-1b-it-Q4_K_M.gguf",
+            true,
+            {
+                "versions_huggingface_hub": [
+                    [
+                        "HUGGINGFACE_DOWNLOAD",
+                        "huggingface_hub",
+                        "1.6.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-04-09T16:48:14.073310733",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    },
+    "download gguf file - smollm3": {
+        "content": [
+            1,
+            {
+                "id": "test_model_smollm3"
+            },
+            "SmolLM3-3B-UD-IQ2_XXS.gguf",
+            true,
+            {
+                "versions_huggingface_hub": [
+                    [
+                        "HUGGINGFACE_DOWNLOAD",
+                        "huggingface_hub",
+                        "1.6.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-04-09T16:49:18.323499722",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/llamacpppython/run/Dockerfile b/modules/nf-core/llamacpppython/run/Dockerfile
new file mode 100644
index 00000000000..c26428f1f92
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/Dockerfile
@@ -0,0 +1,5 @@
+FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
+
+RUN apt-get update && apt-get install -y python3 python3-pip
+RUN pip3 install llama-cpp-python==0.3.16  \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
diff --git a/modules/nf-core/llamacpppython/run/environment.yml b/modules/nf-core/llamacpppython/run/environment.yml
new file mode 100644
index 00000000000..9f314201924
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::llama-cpp-python=0.3.16
diff --git a/modules/nf-core/llamacpppython/run/main.nf b/modules/nf-core/llamacpppython/run/main.nf
new file mode 100644
index 00000000000..0236c7cbeb9
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/main.nf
@@ -0,0 +1,38 @@
+process LLAMACPPPYTHON_RUN {
+    tag "${meta.id}"
+    label 'process_medium'
+    label 'process_gpu'
+
+    conda "${moduleDir}/environment.yml"
+    container "${task.accelerator
+        ? 'quay.io/nf-core/llama-cpp-python:0.1.9'
+        : (workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+            ? 'oras://community.wave.seqera.io/library/llama-cpp-python:0.3.16--d6f959a4c13960c4'
+            : 'community.wave.seqera.io/library/llama-cpp-python:0.3.16--b351398cd0ea7fc5')}"
+
+    input:
+    tuple val(meta), path(prompt_file), path(gguf_model)
+
+    output:
+    tuple val(meta), path("${prefix}.txt"), emit: output
+    path "versions.yml", emit: versions_llama_cpp_python, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template('llama-cpp-python.py')
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        llama-cpp-python: \$(python3 -c 'import llama_cpp; print(llama_cpp.__version__)')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/llamacpppython/run/meta.yml b/modules/nf-core/llamacpppython/run/meta.yml
new file mode 100644
index 00000000000..a97172b7ad1
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/meta.yml
@@ -0,0 +1,62 @@
+name: llamacpppython_run
+description: Python wrapper for running locally-hosted LLM with llama.cpp
+keywords:
+  - inference
+  - llama
+  - llm
+  - local-inference
+  - offline-llm
+tools:
+  - llama-cpp-python:
+      description: "Python wrapper for llama.cpp LLM inference tool"
+      homepage: "https://llama-cpp-python.readthedocs.io/en/latest/"
+      licence:
+        - "MIT"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`- prompt_file:
+    - prompt_file:
+        type: file
+        description: |
+          Prompt file
+          Structure: [ val(meta), path(prompt_file) ]
+        ontologies: []
+    - gguf_model:
+        type: file
+        description: |
+          GGUF model
+          Structure: [ val(meta), path(gguf_model) ]
+        ontologies: []
+output:
+  output:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - ${prefix}.txt:
+          type: file
+          description: File with the output of LLM inference request
+          ontologies: []
+  versions_llama_cpp_python:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+topics:
+  versions:
+    - versions.yml:
+        type: string
+        description: The name of the process
+authors:
+  - "@toniher"
+  - "@lucacozzuto"
+maintainers:
+  - "@toniher"
+  - "@lucacozzuto"
diff --git a/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py b/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py
new file mode 100755
index 00000000000..94f892368df
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import shlex
+import sys
+
+import llama_cpp
+
+
+# Helper to create messages from a text file
+def create_messages_from_textfile(textfile, system_prompt):
+    try:
+        with open(textfile, encoding="utf-8") as f:
+            content = f.read()
+        return [
+            {"role": "system", "content": system_prompt.strip()},
+            {"role": "user", "content": content.strip()},
+        ]
+    except Exception as e:
+        print(f"Error reading text file '{textfile}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+# Helper to load messages from JSON or fallback to text
+def load_messages(messages_file, system_prompt):
+    if not os.path.exists(messages_file):
+        print(f"Messages file '{messages_file}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+    try:
+        with open(messages_file, encoding="utf-8") as f:
+            content = f.read()
+            try:
+                return json.loads(content)
+            except json.JSONDecodeError:
+                return create_messages_from_textfile(messages_file, system_prompt)
+    except Exception as e:
+        print(f"Error opening messages file '{messages_file}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def llamacpp_python(
+    messages_file,
+    model_file,
+    temperature=0.9,
+    output="output.txt",
+    verbose=False,
+    context_size=2048,
+    chat_format="chatml",
+    seed=None,
+):
+    if not os.path.exists(model_file):
+        print(f"Model file '{model_file}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+
+    # Default system prompt
+    system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"
+
+    messages_json = load_messages(messages_file, system_prompt)
+
+    try:
+        llm = llama_cpp.Llama(
+            model_path=model_file,
+            chat_format=chat_format,
+            n_ctx=context_size,
+            seed=seed,
+        )
+        response = llm.create_chat_completion(
+            messages=messages_json,
+            response_format={"type": "json_object"},
+            temperature=temperature,
+        )
+    except Exception as e:
+        print(f"Error running llama_cpp: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    if not verbose:
+        try:
+            reply = response["choices"][0]["message"]["content"]
+        except (KeyError, IndexError, TypeError):
+            reply = response
+        # Try to parse reply as JSON if it's a string
+        if isinstance(reply, str):
+            try:
+                reply_json = json.loads(reply)
+                if isinstance(reply_json, dict) and len(reply_json) == 1:
+                    reply = next(iter(reply_json.values()))
+                else:
+                    reply = reply_json
+            except Exception:
+                pass  # Leave reply as string if not valid JSON
+        elif isinstance(reply, dict) and len(reply) == 1:
+            reply = next(iter(reply.values()))
+    else:
+        reply = response
+
+    try:
+        with open(output, "w", encoding="utf-8") as f:
+            if isinstance(reply, str):
+                f.write(reply)
+            else:
+                f.write(json.dumps(reply, indent=2))
+        if verbose:
+            print(f"Output written to {output}")
+    except Exception as e:
+        print(f"Error writing output file '{output}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main(args_string=None):
+    parser = argparse.ArgumentParser(description="Submit a process with model.")
+    parser.add_argument("-s", "--messages", required=True, help="JSON message")
+    parser.add_argument("-m", "--model", required=True, help="Model used")
+    parser.add_argument("-t", "--temperature", default=0.9, type=float, help="Temperature")
+    parser.add_argument("-o", "--output", default="output.txt", help="Output text")
+    parser.add_argument("-c", "--context", default=2048, type=int, help="Context size")
+    parser.add_argument("--chat_format", default="chatml", help="Chat format")
+    parser.add_argument("--seed", default=None, type=int, help="Defined seed")
+    parser.add_argument("--verbose", action="store_true", help="Verbose output")
+
+    args = parser.parse_args(shlex.split(args_string) if args_string is not None else None)
+    llamacpp_python(
+        messages_file=args.messages,
+        model_file=args.model,
+        temperature=args.temperature,
+        output=args.output,
+        verbose=args.verbose,
+        context_size=args.context,
+        chat_format=args.chat_format,
+        seed=args.seed,
+    )
+
+
+def write_versions():
+    versions = {"${task.process}": {"llama-cpp-python": llama_cpp.__version__}}
+    with open("versions.yml", "w", encoding="utf-8") as f:
+        for process, pkgs in versions.items():
+            f.write(f'"{process}":\\n')
+            for pkg, ver in pkgs.items():
+                f.write(f"    {pkg}: {ver}\\n")
+
+
+if __name__ == "__main__":
+    main("--model ${gguf_model} --messages ${prompt_file} --output ${prefix}.txt ${args}")
+    write_versions()
diff --git a/modules/nf-core/llamacpppython/run/tests/main.nf.test b/modules/nf-core/llamacpppython/run/tests/main.nf.test
new file mode 100644
index 00000000000..9896bd3b705
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/tests/main.nf.test
@@ -0,0 +1,123 @@
+nextflow_process {
+
+    name "Test Process LLAMACPPPYTHON_RUN"
+    script "../main.nf"
+    process "LLAMACPPPYTHON_RUN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "llamacpppython"
+    tag "llamacpppython/run"
+    tag "huggingface/download"
+
+    test("run inference with downloaded gguf model") {
+
+        config "./nextflow.config"
+
+        setup {
+            run("HUGGINGFACE_DOWNLOAD") {
+                script "../../../huggingface/download/main.nf"
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test_model' ],
+                        "ggml-org/gemma-3-1b-it-GGUF",
+                        "gemma-3-1b-it-Q4_K_M.gguf",
+                        "./hf_cache"
+                    ]
+                    """
+                }
+            }
+        }
+
+        when {
+            params {
+              module_args = '--seed 42'
+            }
+            process {
+                """
+                // Generate prompt.json with provided content
+                def promptFile = file('prompt.json')
+                promptFile.text = '''[
+    {
+        "role": "system",
+        "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"
+    },
+    {
+        "role": "user",
+        "content": "Describe Barcelona in one paragraph"
+    }
+]
+'''
+
+                input[0] = HUGGINGFACE_DOWNLOAD.out.output.map { meta, model ->
+                    [ [ id:'test_run' ], promptFile, model ]
+                }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.output.size(),
+                    process.out.output[0][0],
+                    file(process.out.output[0][1]).name,
+                    file(process.out.output[0][1]).size() > 0,
+                    process.out.findAll { key, val -> key.startsWith('versions') }
+                ).match() }
+            )
+        }
+    }
+
+    test("stub - run inference with json prompt") {
+
+        options "-stub"
+
+        when {
+            params {
+              module_args = '--seed 42'
+            }
+            process {
+                """
+                // Generate prompt.json with provided content
+                def promptFile = file('prompt.json')
+                promptFile.text = '''[
+    {
+        "role": "system",
+        "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"
+    },
+    {
+        "role": "user",
+        "content": "Describe Barcelona in one paragraph"
+    }
+]
+'''
+
+                // Generate an empty stub_model.gguf file
+                def modelFile = file('stub_model.gguf')
+                modelFile.text = ''
+
+                input[0] = [
+                    [ id:'test_run' ],
+                    promptFile,
+                    modelFile
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.output.size(),
+                    process.out.output[0][0],
+                    file(process.out.output[0][1]).name,
+                    process.out.findAll { key, val -> key.startsWith('versions') }
+                ).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap b/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap
new file mode 100644
index 00000000000..a5e0744fb50
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+    "run inference with downloaded gguf model": {
+        "content": [
+            1,
+            {
+                "id": "test_run"
+            },
+            "test_run.txt",
+            true,
+            {
+                "versions_llama_cpp_python": [
+                    "versions.yml:md5,e4e5fd3eefb4e0b1e83d8766a8b52e7c"
+                ]
+            }
+        ],
+        "timestamp": "2026-04-14T22:59:52.532384543",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    },
+    "stub - run inference with json prompt": {
+        "content": [
+            1,
+            {
+                "id": "test_run"
+            },
+            "test_run.txt",
+            {
+                "versions_llama_cpp_python": [
+                    "versions.yml:md5,e4e5fd3eefb4e0b1e83d8766a8b52e7c"
+                ]
+            }
+        ],
+        "timestamp": "2026-04-14T22:59:56.61647329",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/llamacpppython/run/tests/nextflow.config b/modules/nf-core/llamacpppython/run/tests/nextflow.config
new file mode 100644
index 00000000000..21556318f13
--- /dev/null
+++ b/modules/nf-core/llamacpppython/run/tests/nextflow.config
@@ -0,0 +1,7 @@
+nextflow.enable.moduleBinaries = true
+
+process {
+    withName: 'LLAMACPPPYTHON_RUN' {
+        ext.args = params.module_args
+    }
+}