Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d97bd65
fix: add shutdown methods to executors and fix lithops memory leak
jbusecke Mar 12, 2026
4f4c061
Add tests and constrain fix only to lithops
jbusecke Mar 12, 2026
d925fb7
Clean up claudes horrible tests
jbusecke Mar 12, 2026
46908c1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 12, 2026
4732cc3
Alternative approach via lithops config
jbusecke Mar 12, 2026
031ffac
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 12, 2026
6414546
toms renaming suggestion
jbusecke Mar 12, 2026
f013d74
Merge branch 'executor-cleaning' of https://github.com/zarr-developer…
jbusecke Mar 12, 2026
216fb09
Merge branch 'main' into executor-cleaning
TomNicholas Mar 12, 2026
6a83bf3
Merge branch 'main' into executor-cleaning
TomNicholas Mar 16, 2026
3f9aa44
Update lithops dependency version in pyproject.toml
jbusecke Mar 16, 2026
af2c83e
Revert lithops dependency version constraint
jbusecke Mar 16, 2026
4481f63
Merge branch 'main' into executor-cleaning
TomNicholas Mar 16, 2026
a612af5
Merge branch 'main' into executor-cleaning
TomNicholas Mar 16, 2026
3ff3bf1
Merge branch 'main' into executor-cleaning
jbusecke Mar 19, 2026
0cb839d
Mark Lithops executor tests as flaky
jbusecke Mar 19, 2026
05316e6
rerun flaky tests
jbusecke Mar 19, 2026
23d2654
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2026
748f50d
Fix LithopsEagerFunctionExecutor.shutdown() not clearing futures
jbusecke Mar 19, 2026
263656e
add comment
jbusecke Mar 19, 2026
b9ba3ca
Test against lithops fork with job_manager thread join fix
jbusecke Mar 19, 2026
63481f8
Add sleep between memory test iterations to allow background threads …
jbusecke Mar 19, 2026
68a60e9
Move sleep to after the loop to allow background threads to exit befo…
jbusecke Mar 19, 2026
ba61353
Increase sleep to 30s to give background threads more time to exit
jbusecke Mar 19, 2026
71a43b4
Revert lithops dep back to released version
jbusecke Mar 19, 2026
394c019
remove the memory growth test
jbusecke Mar 19, 2026
d1822a3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2026
b95baa1
Merge branch 'main' into executor-cleaning
jbusecke Mar 19, 2026
d8c0ab1
Add .shutdown() method to custom executors
jbusecke Mar 19, 2026
887edac
satisfy linter
jbusecke Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions virtualizarr/parallel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import atexit
import inspect
import multiprocessing as mp
import warnings
Expand Down Expand Up @@ -372,4 +373,18 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False) -> None:
wait
Whether to wait for pending futures.
"""
# Free cached results from lithops ResponseFuture objects before shutdown.
Comment thread
jbusecke marked this conversation as resolved.
Outdated
# lithops.FunctionExecutor.futures is never cleared internally — each map()
# call extends it with new ResponseFutures that cache deserialized results
# in _call_output. Without this, memory accumulates across repeated calls.
for f in self.lithops_client.futures:
f._call_output = None
self.lithops_client.futures.clear()
self._futures.clear()

# Lithops registers self.clean as an atexit handler (executors.py __init__),
# which prevents the FunctionExecutor from ever being garbage collected.
# Unregister it so the executor can be freed after shutdown.
atexit.unregister(self.lithops_client.clean)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is absolutely wild and deserves raising upstream

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably so.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see #926


self.lithops_client.__exit__(None, None, None)
107 changes: 105 additions & 2 deletions virtualizarr/tests/test_parallel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import gc
import multiprocessing as mp
import weakref

import pytest

from virtualizarr.parallel import LithopsEagerFunctionExecutor, get_executor
from virtualizarr.tests import requires_lithops
from virtualizarr.parallel import (
DaskDelayedExecutor,
LithopsEagerFunctionExecutor,
SerialExecutor,
get_executor,
)
from virtualizarr.tests import requires_dask, requires_lithops


@requires_lithops
Expand Down Expand Up @@ -41,3 +48,99 @@ def test_get_executor_process_pool_mode():

assert ctx is not None, "Expected executor to have a multiprocessing context"
assert ctx.get_start_method() == "forkserver"


@requires_lithops
class TestLithopsExecutorShutdown:
Comment thread
jbusecke marked this conversation as resolved.
Outdated
def test_shutdown_clears_lithops_client_futures(self):
executor = LithopsEagerFunctionExecutor()
executor.submit(lambda: 42)

executor.shutdown()
assert len(executor.lithops_client.futures) == 0

def test_shutdown_clears_lithops_cached_results(self):
"""Verify that shutdown clears _call_output on lithops ResponseFutures."""
with LithopsEagerFunctionExecutor() as executor:
executor.map(lambda x: x * 2, (1, 2, 3))
lithops_futures = list(executor.lithops_client.futures)
assert len(lithops_futures) > 0

# After shutdown, lithops futures list should be cleared
assert len(executor.lithops_client.futures) == 0


def _make_executor(executor_cls):
"""Create a pytest param for an executor class with appropriate marks."""
marks = {
"DaskDelayedExecutor": [requires_dask],
"LithopsEagerFunctionExecutor": [requires_lithops],
}
return pytest.param(
executor_cls,
id=executor_cls.__name__,
marks=marks.get(executor_cls.__name__, []),
)


ALL_EXECUTORS = [
Comment thread
jbusecke marked this conversation as resolved.
Outdated
_make_executor(SerialExecutor),
_make_executor(DaskDelayedExecutor),
_make_executor(LithopsEagerFunctionExecutor),
]


@pytest.mark.parametrize("executor_cls", ALL_EXECUTORS)
class TestExecutorMemory:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if either of these tests will be reliable enough - curious of @chuckwondo 's thoughts.

def test_executor_does_not_leak_after_context_manager(self, executor_cls):
"""Executor and its futures should be GC-collectable after the with block."""

with executor_cls() as executor:
# Use map() since lithops call_async requires a data argument
list(executor.map(lambda x: x * 2, range(5)))
ref = weakref.ref(executor)

# Drop the only local reference to the executor
del executor
gc.collect()

assert ref() is None, (
f"{executor_cls.__name__} was not garbage collected after shutdown"
)

def test_repeated_executor_use_does_not_grow_memory(self, executor_cls):
"""Memory should not grow when creating and destroying executors repeatedly."""
import tracemalloc

def _run_once():
with executor_cls() as executor:
# Use map() to produce non-trivial results
return list(executor.map(lambda x: list(range(10_000)), range(5)))

# Warm up (first run may allocate caches, import modules, etc.)
_run_once()
gc.collect()

# Measure baseline: peak memory from a single run
tracemalloc.start()
_run_once()
gc.collect()
_, baseline_peak = tracemalloc.get_traced_memory()
tracemalloc.stop()

# Now run many iterations and check peak doesn't grow
tracemalloc.start()
n_iterations = 10
for _ in range(n_iterations):
_run_once()
gc.collect()
_, multi_peak = tracemalloc.get_traced_memory()
tracemalloc.stop()

# If memory leaks, peak will scale with n_iterations.
# Allow 1.2x the single-run peak to account for GC timing jitter.
assert multi_peak < 1.2 * baseline_peak, (
f"{executor_cls.__name__} leaked memory: single run peak "
f"{baseline_peak / 1024:.0f} KB, {n_iterations} runs peak "
f"{multi_peak / 1024:.0f} KB"
)
Loading