diff --git a/modules/nf-core/huggingface/download/environment.yml b/modules/nf-core/huggingface/download/environment.yml new file mode 100644 index 00000000000..f2267b412f6 --- /dev/null +++ b/modules/nf-core/huggingface/download/environment.yml @@ -0,0 +1,6 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge +dependencies: + - conda-forge::huggingface_hub=1.6.0 diff --git a/modules/nf-core/huggingface/download/main.nf b/modules/nf-core/huggingface/download/main.nf new file mode 100644 index 00000000000..095a4d472e6 --- /dev/null +++ b/modules/nf-core/huggingface/download/main.nf @@ -0,0 +1,29 @@ +process HUGGINGFACE_DOWNLOAD { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "community.wave.seqera.io/library/huggingface_hub:1.6.0--c106a7f9664ca39b" + + input: + tuple val(meta), val(hf_repo), val(hf_file), val(hf_home) + + output: + tuple val(meta), path(hf_file), emit: output + tuple val("${task.process}"), val("huggingface_hub"), eval("hf --version 2>&1 | tail -n1 | awk '{print \$NF}'"), topic: versions, emit: versions_huggingface_hub + + when: + task.ext.when == null || task.ext.when + + script: + """ + export HF_HOME="${hf_home}" + export HF_HUB_CACHE=\$HF_HOME + hf download ${hf_repo} ${hf_file} --local-dir \$PWD + """ + + stub: + """ + touch ${hf_file} + """ +} diff --git a/modules/nf-core/huggingface/download/meta.yml b/modules/nf-core/huggingface/download/meta.yml new file mode 100644 index 00000000000..f961aa7458d --- /dev/null +++ b/modules/nf-core/huggingface/download/meta.yml @@ -0,0 +1,69 @@ +name: huggingface_download +description: Command-line interface for downloading models in GGUF format from Hugging Face Hub +keywords: + - gguf + - inference + - llama + - llm + - local-inference + - offline-llm +tools: + - huggingface_hub: + description: "Command-line interface for interacting with Hugging Face Hub, allowing to download, upload and interact with models and datasets" + homepage: "https://huggingface.co/docs/huggingface_hub/guides/cli" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`- prompt_file: + - hf_repo: + type: string + description: Hugging Face repository + - hf_file: + type: string + description: Hugging Face GGUF file + - hf_home: + type: string + description: Hugging Face default cache directory +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - hf_file: + type: file + description: Downloaded Hugging Face GGUF file + ontologies: [] + versions_huggingface_hub: + - - ${task.process}: + type: string + description: The name of the process + - huggingface_hub: + type: string + description: The name of the tool + - hf --version 2>&1 | tail -n1 | awk '{print \$NF}': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - huggingface_hub: + type: string + description: The name of the tool + - hf --version 2>&1 | tail -n1 | awk '{print \$NF}': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@toniher" + - "@lucacozzuto" +maintainers: + - "@toniher" + - "@lucacozzuto" diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test b/modules/nf-core/huggingface/download/tests/main.nf.test new file mode 100644 index 00000000000..74cb97ec5c2 --- /dev/null +++ b/modules/nf-core/huggingface/download/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process HUGGINGFACE_DOWNLOAD" + script "../main.nf" + process "HUGGINGFACE_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "huggingface" + tag "huggingface/download" + + test("download gguf file - gemma3") { + + when { + process { + """ + input[0] = [ + [ id:'test_model_gemma3' ], + "ggml-org/gemma-3-1b-it-GGUF", + "gemma-3-1b-it-Q4_K_M.gguf", + "./hf_cache" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output.size(), + process.out.output[0][0], + file(process.out.output[0][1]).name, + file(process.out.output[0][1]).size() > 0, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("download gguf file - smollm3") { + + when { + process { + """ + input[0] = [ + [ id:'test_model_smollm3' ], + "unsloth/SmolLM3-3B-GGUF", + "SmolLM3-3B-UD-IQ2_XXS.gguf", + "./hf_cache" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output.size(), + process.out.output[0][0], + file(process.out.output[0][1]).name, + file(process.out.output[0][1]).size() > 0, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("stub - download gguf file - gemma3") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_model_gemma3' ], + "ggml-org/gemma-3-1b-it-GGUF", + "gemma-3-1b-it-Q4_K_M.gguf", + "./hf_cache" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test.snap b/modules/nf-core/huggingface/download/tests/main.nf.test.snap new file mode 100644 index 00000000000..145bf736d33 --- /dev/null +++ b/modules/nf-core/huggingface/download/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "stub - download gguf file - gemma3": { + "content": [ + { + "output": [ + [ + { + "id": "test_model_gemma3" + }, + "gemma-3-1b-it-Q4_K_M.gguf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_huggingface_hub": [ + [ + "HUGGINGFACE_DOWNLOAD", + "huggingface_hub", + "1.6.0" + ] + ] + } + ], + "timestamp": "2026-04-04T22:00:17.896195894", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "download gguf file - gemma3": { + "content": [ + 1, + { + "id": "test_model_gemma3" + }, + "gemma-3-1b-it-Q4_K_M.gguf", + true, + { + "versions_huggingface_hub": [ + [ + "HUGGINGFACE_DOWNLOAD", + "huggingface_hub", + "1.6.0" + ] + ] + } + ], + "timestamp": "2026-04-09T16:48:14.073310733", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "download gguf file - smollm3": { + "content": [ + 1, + { + "id": "test_model_smollm3" + }, + "SmolLM3-3B-UD-IQ2_XXS.gguf", + true, + { + "versions_huggingface_hub": [ + [ + "HUGGINGFACE_DOWNLOAD", + "huggingface_hub", + "1.6.0" + ] + ] + } + ], + "timestamp": "2026-04-09T16:49:18.323499722", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/llamacpppython/run/Dockerfile b/modules/nf-core/llamacpppython/run/Dockerfile new file mode 100644 index 00000000000..c26428f1f92 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/Dockerfile @@ -0,0 +1,5 @@ +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 + +RUN apt-get update && apt-get install -y python3 python3-pip +RUN pip3 install llama-cpp-python==0.3.16 \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 diff --git a/modules/nf-core/llamacpppython/run/environment.yml b/modules/nf-core/llamacpppython/run/environment.yml new file mode 100644 index 00000000000..9f314201924 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::llama-cpp-python=0.3.16 diff --git a/modules/nf-core/llamacpppython/run/main.nf b/modules/nf-core/llamacpppython/run/main.nf new file mode 100644 index 00000000000..0236c7cbeb9 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/main.nf @@ -0,0 +1,38 @@ +process LLAMACPPPYTHON_RUN { + tag "${meta.id}" + label 'process_medium' + label 'process_gpu' + + conda "${moduleDir}/environment.yml" + container "${task.accelerator + ? 'quay.io/nf-core/llama-cpp-python:0.1.9' + : (workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/llama-cpp-python:0.3.16--d6f959a4c13960c4' + : 'community.wave.seqera.io/library/llama-cpp-python:0.3.16--b351398cd0ea7fc5')}" + + input: + tuple val(meta), path(prompt_file), path(gguf_model) + + output: + tuple val(meta), path("${prefix}.txt"), emit: output + path "versions.yml", emit: versions_llama_cpp_python, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + template('llama-cpp-python.py') + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + llama-cpp-python: \$(python3 -c 'import llama_cpp; print(llama_cpp.__version__)') + END_VERSIONS + """ +} diff --git a/modules/nf-core/llamacpppython/run/meta.yml b/modules/nf-core/llamacpppython/run/meta.yml new file mode 100644 index 00000000000..a97172b7ad1 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/meta.yml @@ -0,0 +1,62 @@ +name: llamacpppython_run +description: Python wrapper for running locally-hosted LLM with llama.cpp +keywords: + - inference + - llama + - llm + - local-inference + - offline-llm +tools: + - llama-cpp-python: + description: "Python wrapper for llama.cpp LLM inference tool" + homepage: "https://llama-cpp-python.readthedocs.io/en/latest/" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`- prompt_file: + - prompt_file: + type: file + description: | + Prompt file + Structure: [ val(meta), path(prompt_file) ] + ontologies: [] + - gguf_model: + type: file + description: | + GGUF model + Structure: [ val(meta), path(gguf_model) ] + ontologies: [] +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.txt: + type: file + description: File with the output of LLM inference request + ontologies: [] + versions_llama_cpp_python: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +topics: + versions: + - versions.yml: + type: string + description: The name of the process +authors: + - "@toniher" + - "@lucacozzuto" +maintainers: + - "@toniher" + - "@lucacozzuto" diff --git a/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py b/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py new file mode 100755 index 00000000000..94f892368df --- /dev/null +++ b/modules/nf-core/llamacpppython/run/templates/llama-cpp-python.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import shlex +import sys + +import llama_cpp + + +# Helper to create messages from a text file +def create_messages_from_textfile(textfile, system_prompt): + try: + with open(textfile, encoding="utf-8") as f: + content = f.read() + return [ + {"role": "system", "content": system_prompt.strip()}, + {"role": "user", "content": content.strip()}, + ] + except Exception as e: + print(f"Error reading text file '{textfile}': {e}", file=sys.stderr) + sys.exit(1) + + +# Helper to load messages from JSON or fallback to text +def load_messages(messages_file, system_prompt): + if not os.path.exists(messages_file): + print(f"Messages file '{messages_file}' does not exist.", file=sys.stderr) + sys.exit(1) + try: + with open(messages_file, encoding="utf-8") as f: + content = f.read() + try: + return json.loads(content) + except json.JSONDecodeError: + return create_messages_from_textfile(messages_file, system_prompt) + except Exception as e: + print(f"Error opening messages file '{messages_file}': {e}", file=sys.stderr) + sys.exit(1) + + +def llamacpp_python( + messages_file, + model_file, + temperature=0.9, + output="output.txt", + verbose=False, + context_size=2048, + chat_format="chatml", + seed=None, +): + if not os.path.exists(model_file): + print(f"Model file '{model_file}' does not exist.", file=sys.stderr) + sys.exit(1) + + # Default system prompt + system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions" + + messages_json = load_messages(messages_file, system_prompt) + + try: + llm = llama_cpp.Llama( + model_path=model_file, + chat_format=chat_format, + n_ctx=context_size, + seed=seed, + ) + response = llm.create_chat_completion( + messages=messages_json, + response_format={"type": "json_object"}, + temperature=temperature, + ) + except Exception as e: + print(f"Error running llama_cpp: {e}", file=sys.stderr) + sys.exit(1) + + if not verbose: + try: + reply = response["choices"][0]["message"]["content"] + except (KeyError, IndexError, TypeError): + reply = response + # Try to parse reply as JSON if it's a string + if isinstance(reply, str): + try: + reply_json = json.loads(reply) + if isinstance(reply_json, dict) and len(reply_json) == 1: + reply = next(iter(reply_json.values())) + else: + reply = reply_json + except Exception: + pass # Leave reply as string if not valid JSON + elif isinstance(reply, dict) and len(reply) == 1: + reply = next(iter(reply.values())) + else: + reply = response + + try: + with open(output, "w", encoding="utf-8") as f: + if isinstance(reply, str): + f.write(reply) + else: + f.write(json.dumps(reply, indent=2)) + if verbose: + print(f"Output written to {output}") + except Exception as e: + print(f"Error writing output file '{output}': {e}", file=sys.stderr) + sys.exit(1) + + +def main(args_string=None): + parser = argparse.ArgumentParser(description="Submit a process with model.") + parser.add_argument("-s", "--messages", required=True, help="JSON message") + parser.add_argument("-m", "--model", required=True, help="Model used") + parser.add_argument("-t", "--temperature", default=0.9, type=float, help="Temperature") + parser.add_argument("-o", "--output", default="output.txt", help="Output text") + parser.add_argument("-c", "--context", default=2048, type=int, help="Context size") + parser.add_argument("--chat_format", default="chatml", help="Chat format") + parser.add_argument("--seed", default=None, type=int, help="Defined seed") + parser.add_argument("--verbose", action="store_true", help="Verbose output") + + args = parser.parse_args(shlex.split(args_string) if args_string is not None else None) + llamacpp_python( + messages_file=args.messages, + model_file=args.model, + temperature=args.temperature, + output=args.output, + verbose=args.verbose, + context_size=args.context, + chat_format=args.chat_format, + seed=args.seed, + ) + + +def write_versions(): + versions = {"${task.process}": {"llama-cpp-python": llama_cpp.__version__}} + with open("versions.yml", "w", encoding="utf-8") as f: + for process, pkgs in versions.items(): + f.write(f'"{process}":\\n') + for pkg, ver in pkgs.items(): + f.write(f" {pkg}: {ver}\\n") + + +if __name__ == "__main__": + main("--model ${gguf_model} --messages ${prompt_file} --output ${prefix}.txt ${args}") + write_versions() diff --git a/modules/nf-core/llamacpppython/run/tests/main.nf.test b/modules/nf-core/llamacpppython/run/tests/main.nf.test new file mode 100644 index 00000000000..9896bd3b705 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + + name "Test Process LLAMACPPPYTHON_RUN" + script "../main.nf" + process "LLAMACPPPYTHON_RUN" + + tag "modules" + tag "modules_nfcore" + tag "llamacpppython" + tag "llamacpppython/run" + tag "huggingface/download" + + test("run inference with downloaded gguf model") { + + config "./nextflow.config" + + setup { + run("HUGGINGFACE_DOWNLOAD") { + script "../../../huggingface/download/main.nf" + process { + """ + input[0] = [ + [ id:'test_model' ], + "ggml-org/gemma-3-1b-it-GGUF", + "gemma-3-1b-it-Q4_K_M.gguf", + "./hf_cache" + ] + """ + } + } + } + + when { + params { + module_args = '--seed 42' + } + process { + """ + // Generate prompt.json with provided content + def promptFile = file('prompt.json') + promptFile.text = '''[ + { + "role": "system", + "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions" + }, + { + "role": "user", + "content": "Describe Barcelona in one paragraph" + } +] +''' + + input[0] = HUGGINGFACE_DOWNLOAD.out.output.map { meta, model -> + [ [ id:'test_run' ], promptFile, model ] + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output.size(), + process.out.output[0][0], + file(process.out.output[0][1]).name, + file(process.out.output[0][1]).size() > 0, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("stub - run inference with json prompt") { + + options "-stub" + + when { + params { + module_args = '--seed 42' + } + process { + """ + // Generate prompt.json with provided content + def promptFile = file('prompt.json') + promptFile.text = '''[ + { + "role": "system", + "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions" + }, + { + "role": "user", + "content": "Describe Barcelona in one paragraph" + } +] +''' + + // Generate an empty stub_model.gguf file + def modelFile = file('stub_model.gguf') + modelFile.text = '' + + input[0] = [ + [ id:'test_run' ], + promptFile, + modelFile + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output.size(), + process.out.output[0][0], + file(process.out.output[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap b/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap new file mode 100644 index 00000000000..a5e0744fb50 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "run inference with downloaded gguf model": { + "content": [ + 1, + { + "id": "test_run" + }, + "test_run.txt", + true, + { + "versions_llama_cpp_python": [ + "versions.yml:md5,e4e5fd3eefb4e0b1e83d8766a8b52e7c" + ] + } + ], + "timestamp": "2026-04-14T22:59:52.532384543", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "stub - run inference with json prompt": { + "content": [ + 1, + { + "id": "test_run" + }, + "test_run.txt", + { + "versions_llama_cpp_python": [ + "versions.yml:md5,e4e5fd3eefb4e0b1e83d8766a8b52e7c" + ] + } + ], + "timestamp": "2026-04-14T22:59:56.61647329", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/llamacpppython/run/tests/nextflow.config b/modules/nf-core/llamacpppython/run/tests/nextflow.config new file mode 100644 index 00000000000..21556318f13 --- /dev/null +++ b/modules/nf-core/llamacpppython/run/tests/nextflow.config @@ -0,0 +1,7 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: 'LLAMACPPPYTHON_RUN' { + ext.args = params.module_args + } +}