Skip to content
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
de9cd4c
added parallelization to WaterBridgeAnalysis
talagayev Nov 20, 2025
84c74d0
adjusted the test files for WB
talagayev Nov 25, 2025
666aabf
black formatting
talagayev Nov 26, 2025
946ec67
removed files that were moved
talagayev Nov 26, 2025
8db21b4
black formatting
talagayev Nov 26, 2025
3b4812f
fixed case when empty results with parallelization
talagayev Nov 26, 2025
dd2e21b
remove unused imports
marinegor Nov 26, 2025
6fab4c6
got back StringIO case for showcase
talagayev Nov 27, 2025
6708442
Merge branch 'develop' into wb_parallel
talagayev Nov 27, 2025
d6bdae9
Update test_wbridge.py
talagayev Nov 27, 2025
e10772c
Update conftest.py
talagayev Nov 27, 2025
fca6ca2
Update test_wbridge.py
talagayev Nov 27, 2025
844b237
Merge branch 'MDAnalysis:develop' into wb_parallel
talagayev Feb 2, 2026
d26ec16
suggested PR adjustment
talagayev Feb 2, 2026
7f06d51
modified test
talagayev Feb 2, 2026
a796920
Merge branch 'wb_parallel' of https://github.com/talagayev/mdanalysis…
talagayev Feb 2, 2026
412c682
modified test
talagayev Feb 2, 2026
49133c5
black formatting
talagayev Feb 2, 2026
200843c
black formatting
talagayev Feb 2, 2026
5871f5c
Update datafiles.py
talagayev Feb 3, 2026
cc63480
comments + black formatting
talagayev Feb 4, 2026
07fff26
removal of teststeps and use of times
talagayev Feb 11, 2026
3303da5
black format
talagayev Feb 11, 2026
4398e13
adjusted tests
talagayev Feb 11, 2026
b1d32ee
adjusted code
talagayev Feb 11, 2026
504cee5
added versionchanged
talagayev Feb 12, 2026
6dbee6c
added CHANGELOG entry
talagayev Feb 12, 2026
e18cb91
Merge branch 'develop' into wb_parallel
talagayev Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 114 additions & 25 deletions package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def analysis(current, output, u, **kwargs):
from MDAnalysis.lib.distances import calc_angles, capped_distance
from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch

from ..base import AnalysisBase
from ..base import AnalysisBase, ResultsGroup

logger = logging.getLogger("MDAnalysis.analysis.WaterBridgeAnalysis")

Expand Down Expand Up @@ -804,6 +804,16 @@ class WaterBridgeAnalysis(AnalysisBase):
lambda: 1.5, N=1.31, O=1.31, P=1.58, S=1.55 # default value
) # noqa: E741

_analysis_algorithm_is_parallelizable = True

@classmethod
def get_supported_backends(cls):
return (
"serial",
"multiprocessing",
"dask",
)

def __init__(
self,
universe,
Expand Down Expand Up @@ -1014,7 +1024,8 @@ def __init__(
# final result accessed as self.results.network
self.results.network = []
self.results.timeseries = None
self.timesteps = None # time for each frame
self.results.timesteps = None
self._timesteps = []
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need results.timesteps and _timesteps? How does this differ from the standard times that come with AnalysisBase


self._log_parameters()

Expand Down Expand Up @@ -1301,7 +1312,7 @@ def _prepare(self):

self._update_selection()

self.timesteps = []
self._timesteps = []
if len(self._s1) and len(self._s2):
self._update_water_selection()
else:
Expand Down Expand Up @@ -1395,8 +1406,37 @@ def _donor2acceptor(self, donors, h_donors, acceptor):
)
return result

def _iter_timesteps(self):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a hack/band-aid. I believe you that it's necessary. Just make sure that you give as much information in the doc string/comments so that it's clear why it's here.

Is this code tested??

"""Iterable timesteps aligned with results.network.

In parallel backends, aggregation can occasionally yield a 0-d object
array containing None (e.g. array(None, dtype=object)). This helper
normalizes such cases and falls back to frame indices.
"""
n = len(self.results.network)
ts = self.results.timesteps
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would call this variable something else — we use ts everywhere in the code to mean "an instance of Timestep, add it's quite confusing when it doesn't mean that.

Just call it timesteps (which also includes the plural to indicate that this is a container).


if ts is None:
return range(n)

ts = np.asarray(ts)

# e.g. array(None, dtype=object) or scalar time
if ts.ndim == 0:
item = ts.item()
if item is None:
return range(n)
# if only one frame, accept scalar; otherwise fall back
return [item] if n == 1 else range(n)

# empty or mismatched length -> fall back
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this also related to scalars?? Document in doc string?

if ts.size != n:
return range(n)

return ts

def _single_frame(self):
self.timesteps.append(self._ts.time)
self._timesteps.append(self._ts.time)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This replicates standard AnalysisBase.times or am I missing something?

self.box = self.u.dimensions if self.pbc else None

if self.update_selection:
Expand Down Expand Up @@ -1951,25 +1991,45 @@ def count_by_time(self, analysis_func=None, **kwargs):
"""
if analysis_func is None:
analysis_func = self._count_by_time_analysis
if self.results.network:
result = []
for time, frame in zip(self.timesteps, self.results.network):
result_dict = defaultdict(int)
self._traverse_water_network(
frame,
[],
analysis_func=analysis_func,
output=result_dict,
link_func=self._full_link,
**kwargs,
)
result.append(
(time, sum([result_dict[key] for key in result_dict]))
)
return result
else:

if not self.results.network:
return None

# Fallback when missing/empty/mismatched timesteps as missing
# happens when some parts end up contributing no timesteps
# Calculate frames and how many timesteps produced
n = len(self.results.network)
timesteps = self.results.timesteps

# Fallback if None are produced
if timesteps is None:
timesteps = range(n)
self.results.timesteps = np.asarray(list(timesteps), dtype=float)
else:
timesteps = np.asarray(timesteps)
# Check lenght for validation
if timesteps.ndim != 1 or timesteps.size != n:
timesteps = range(n)
self.results.timesteps = np.asarray(
list(timesteps), dtype=float
)

result = []
for time, frame in zip(timesteps, self.results.network):
result_dict = defaultdict(int)
self._traverse_water_network(
frame,
[],
analysis_func=analysis_func,
output=result_dict,
link_func=self._full_link,
**kwargs,
)
result.append(
(time, sum([result_dict[key] for key in result_dict]))
)
return result

def _timesteps_by_type_analysis(self, current, output, *args, **kwargs):
s1_index, to_index, s1, to_residue, dist, angle = (
self._expand_timeseries(current[0])
Expand Down Expand Up @@ -2016,11 +2076,15 @@ def timesteps_by_type(self, analysis_func=None, **kwargs):

if self.results.network:
result = defaultdict(list)
if self.timesteps is None:
timesteps = self.results.timesteps
if timesteps is None:
timesteps = range(len(self.results.network))
else:
timesteps = self.timesteps
for time, frame in zip(timesteps, self.results.network):
if (
isinstance(time, (float, np.floating))
and float(time).is_integer()
):
time = int(time)
self._traverse_water_network(
frame,
[],
Expand Down Expand Up @@ -2120,7 +2184,11 @@ def generate_table(self, output_format=None):
# standard array, like this:
out = np.empty((num_records,), dtype=dtype)
cursor = 0 # current row
for t, hframe in zip(self.timesteps, timeseries):
timesteps = self.results.timesteps
if timesteps is None:
timesteps = range(len(timeseries))

for t, hframe in zip(timesteps, timeseries):
for (
donor_index,
acceptor_index,
Expand Down Expand Up @@ -2150,8 +2218,20 @@ def generate_table(self, output_format=None):
return table

def _conclude(self):
# saving timesteps in results for parallelization
self.results.timesteps = np.asarray(self._timesteps)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove empty line?

self.results.timeseries = self._generate_timeseries()

def _get_aggregator(self):
return ResultsGroup(
lookup={
"timeseries": ResultsGroup.ndarray_hstack,
"timesteps": ResultsGroup.ndarray_hstack,
"network": ResultsGroup.ndarray_hstack,
}
)

@property
def network(self):
wmsg = (
Expand All @@ -2171,3 +2251,12 @@ def timeseries(self):
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.timeseries

@property
def timesteps(self):
wmsg = (
"The `timesteps` attribute is deprecated and will be removed in "
"MDAnalysis 3.0.0. Please use `results.timesteps` instead."
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Recommend to use times (assuming it's equivalent)?

)
warnings.warn(wmsg, DeprecationWarning)
return self.results.timesteps
11 changes: 11 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import (
HydrogenBondAnalysis,
)
from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import (
WaterBridgeAnalysis,
)
from MDAnalysis.analysis.nucleicacids import NucPairDist
from MDAnalysis.analysis.contacts import Contacts
from MDAnalysis.analysis.density import DensityAnalysis
Expand Down Expand Up @@ -217,3 +220,11 @@ def client_InterRDF_s(request):
@pytest.fixture(scope="module", params=params_for_cls(DistanceMatrix))
def client_DistanceMatrix(request):
return request.param


# MDAnalysis.analysis.hydrogenbonds.wbridge_analysis
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can remove, but would it not better to keep it for consistency with the other functions, that are also commented in that way like here:

https://github.com/talagayev/mdanalysis/blob/5871f5c876bc873c19f9fc8c9694e40c1f2021a7/testsuite/MDAnalysisTests/analysis/conftest.py#L217-L222

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would keep for consistency.



@pytest.fixture(scope="module", params=params_for_cls(WaterBridgeAnalysis))
def client_WaterBridgeAnalysis(request):
return request.param
Loading
Loading