From 8d4a24775dd3fed1fd16b6c8cc50080538a845b2 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 1 Apr 2026 15:06:31 +0200 Subject: [PATCH] fix(cache): improve NFS/lock error handling in DefaultCacheStore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three improvements to DefaultCacheStore.openDb(): 1. Log root cause: the underlying LevelDB exception is now logged at DEBUG level so it appears in .nextflow.log, aiding diagnosis. 2. Add NXF_CACHE_DIR env-var support: the new resolveCacheBaseDir() helper checks for the NXF_CACHE_DIR environment variable and uses it as the base directory for the LevelDB cache. This lets users on NFS / Lustre / GPFS clusters redirect only the cache DB to a local path (e.g. /tmp) without having to move the entire pipeline launch directory. 3. Improve error message: the previous message incorrectly advised users to use '-w' (which controls the work directory, not the cache directory). The new message accurately describes the root cause and documents both remedies: running from a local directory with -w, or setting NXF_CACHE_DIR. Fixes: DefaultCacheStore caught a generic Exception from LevelDB when the launch directory is on a filesystem without POSIX file-lock support (NFS, Lustre, GPFS, BeeGFS, …) and emitted a misleading error message. Signed-off-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- .../nextflow/cache/DefaultCacheStore.groovy | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cache/DefaultCacheStore.groovy b/modules/nextflow/src/main/groovy/nextflow/cache/DefaultCacheStore.groovy index 9adea464a2..c8ea9dfd0e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cache/DefaultCacheStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cache/DefaultCacheStore.groovy @@ -20,6 +20,7 @@ import java.nio.file.Path import com.google.common.hash.HashCode import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j import nextflow.Const import nextflow.exception.AbortOperationException import nextflow.util.CacheHelper @@ -32,6 +33,7 @@ import org.iq80.leveldb.impl.Iq80DBFactory * * @author Paolo Di Tommaso */ +@Slf4j @CompileStatic class DefaultCacheStore implements CacheStore { @@ -63,11 +65,31 @@ class DefaultCacheStore implements CacheStore { this.KEY_SIZE = CacheHelper.hasher('x').hash().asBytes().size() this.uniqueId = uniqueId this.runName = runName - this.baseDir = home ?: Const.appCacheDir.toAbsolutePath() + this.baseDir = home ?: resolveCacheBaseDir() this.dataDir = baseDir.resolve("cache/$uniqueId") this.indexFile = dataDir.resolve("index.$runName") } + /** + * Resolve the base directory for the cache DB. + * + * The {@code NXF_CACHE_DIR} environment variable can be used to redirect + * the LevelDB cache to a local filesystem that supports file locking + * (e.g. {@code export NXF_CACHE_DIR=/tmp/nxf-cache}) when the pipeline + * launch directory resides on a network filesystem such as NFS or Lustre. + * + * When {@code NXF_CACHE_DIR} is not set the default ({@code .nextflow/} + * relative to the launch directory) is used. + */ + private static Path resolveCacheBaseDir() { + final override = System.getenv('NXF_CACHE_DIR') + if( override ) { + log.debug "Using NXF_CACHE_DIR for cache base directory: $override" + return Path.of(override).toAbsolutePath() + } + return Const.appCacheDir.toAbsolutePath() + } + private void openDb() { // make sure the db path exists dataDir.mkdirs() @@ -90,11 +112,18 @@ class DefaultCacheStore implements CacheStore { throw new IOException(msg) } else { + // Log the underlying cause so it is visible in .nextflow.log for diagnosis. + log.debug "Failed to open LevelDB cache at path: $file -- cause: ${e.message}", e msg = "Can't open cache DB: $file" msg += '\n\n' - msg += "Nextflow needs to be executed in a shared file system that supports file locks.\n" - msg += "Alternatively, you can run it in a local directory and specify the shared work\n" - msg += "directory by using the `-w` command line option." + msg += "The Nextflow cache DB is located on a filesystem that does not support file locking.\n" + msg += "This is common when the pipeline is launched from a network filesystem (NFS, Lustre, GPFS, …).\n" + msg += "\nTo fix this, choose one of the following options:\n" + msg += " 1. Run Nextflow from a local directory and point the work directory to your shared\n" + msg += " filesystem using the `-w` command line option.\n" + msg += " 2. Set the NXF_CACHE_DIR environment variable to a local path so that only the\n" + msg += " cache DB is redirected to a lock-capable filesystem, e.g.:\n" + msg += " export NXF_CACHE_DIR=/tmp/nxf-cache-\$USER" throw new IOException(msg, e) } }