-
Notifications
You must be signed in to change notification settings - Fork 832
Enabling of parallelization of analysis.hydrogenbonds.WaterBridgeAnalysis
#5151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 21 commits
de9cd4c
84c74d0
666aabf
946ec67
8db21b4
3b4812f
dd2e21b
6fab4c6
6708442
d6bdae9
e10772c
fca6ca2
844b237
d26ec16
7f06d51
a796920
412c682
49133c5
200843c
5871f5c
cc63480
07fff26
3303da5
4398e13
b1d32ee
504cee5
6dbee6c
e18cb91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -716,7 +716,7 @@ def analysis(current, output, u, **kwargs): | |
| from MDAnalysis.lib.distances import calc_angles, capped_distance | ||
| from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch | ||
|
|
||
| from ..base import AnalysisBase | ||
| from ..base import AnalysisBase, ResultsGroup | ||
|
|
||
| logger = logging.getLogger("MDAnalysis.analysis.WaterBridgeAnalysis") | ||
|
|
||
|
|
@@ -804,6 +804,16 @@ class WaterBridgeAnalysis(AnalysisBase): | |
| lambda: 1.5, N=1.31, O=1.31, P=1.58, S=1.55 # default value | ||
| ) # noqa: E741 | ||
|
|
||
| _analysis_algorithm_is_parallelizable = True | ||
|
|
||
| @classmethod | ||
| def get_supported_backends(cls): | ||
| return ( | ||
| "serial", | ||
| "multiprocessing", | ||
| "dask", | ||
| ) | ||
|
|
||
| def __init__( | ||
| self, | ||
| universe, | ||
|
|
@@ -1014,7 +1024,8 @@ def __init__( | |
| # final result accessed as self.results.network | ||
| self.results.network = [] | ||
| self.results.timeseries = None | ||
| self.timesteps = None # time for each frame | ||
| self.results.timesteps = None | ||
| self._timesteps = [] | ||
|
|
||
| self._log_parameters() | ||
|
|
||
|
|
@@ -1301,7 +1312,7 @@ def _prepare(self): | |
|
|
||
| self._update_selection() | ||
|
|
||
| self.timesteps = [] | ||
| self._timesteps = [] | ||
| if len(self._s1) and len(self._s2): | ||
| self._update_water_selection() | ||
| else: | ||
|
|
@@ -1395,8 +1406,37 @@ def _donor2acceptor(self, donors, h_donors, acceptor): | |
| ) | ||
| return result | ||
|
|
||
| def _iter_timesteps(self): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like a hack/band-aid. I believe you that it's necessary. Just make sure that you give as much information in the doc string/comments so that it's clear why it's here. Is this code tested?? |
||
| """Iterable timesteps aligned with results.network. | ||
|
|
||
| In parallel backends, aggregation can occasionally yield a 0-d object | ||
| array containing None (e.g. array(None, dtype=object)). This helper | ||
| normalizes such cases and falls back to frame indices. | ||
| """ | ||
| n = len(self.results.network) | ||
| ts = self.results.timesteps | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would call this variable something else — we use Just call it |
||
|
|
||
| if ts is None: | ||
| return range(n) | ||
|
|
||
| ts = np.asarray(ts) | ||
|
|
||
| # e.g. array(None, dtype=object) or scalar time | ||
| if ts.ndim == 0: | ||
| item = ts.item() | ||
| if item is None: | ||
| return range(n) | ||
| # if only one frame, accept scalar; otherwise fall back | ||
| return [item] if n == 1 else range(n) | ||
|
|
||
| # empty or mismatched length -> fall back | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this also related to scalars?? Document in doc string? |
||
| if ts.size != n: | ||
| return range(n) | ||
|
|
||
| return ts | ||
|
|
||
| def _single_frame(self): | ||
| self.timesteps.append(self._ts.time) | ||
| self._timesteps.append(self._ts.time) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This replicates standard |
||
| self.box = self.u.dimensions if self.pbc else None | ||
|
|
||
| if self.update_selection: | ||
|
|
@@ -1951,25 +1991,45 @@ def count_by_time(self, analysis_func=None, **kwargs): | |
| """ | ||
| if analysis_func is None: | ||
| analysis_func = self._count_by_time_analysis | ||
| if self.results.network: | ||
| result = [] | ||
| for time, frame in zip(self.timesteps, self.results.network): | ||
| result_dict = defaultdict(int) | ||
| self._traverse_water_network( | ||
| frame, | ||
| [], | ||
| analysis_func=analysis_func, | ||
| output=result_dict, | ||
| link_func=self._full_link, | ||
| **kwargs, | ||
| ) | ||
| result.append( | ||
| (time, sum([result_dict[key] for key in result_dict])) | ||
| ) | ||
| return result | ||
| else: | ||
|
|
||
| if not self.results.network: | ||
| return None | ||
|
|
||
| # Fallback when missing/empty/mismatched timesteps as missing | ||
| # happens when some parts end up contributing no timesteps | ||
| # Calculate frames and how many timesteps produced | ||
| n = len(self.results.network) | ||
| timesteps = self.results.timesteps | ||
|
|
||
| # Fallback if None are produced | ||
| if timesteps is None: | ||
| timesteps = range(n) | ||
| self.results.timesteps = np.asarray(list(timesteps), dtype=float) | ||
| else: | ||
| timesteps = np.asarray(timesteps) | ||
| # Check lenght for validation | ||
| if timesteps.ndim != 1 or timesteps.size != n: | ||
| timesteps = range(n) | ||
| self.results.timesteps = np.asarray( | ||
| list(timesteps), dtype=float | ||
| ) | ||
|
|
||
| result = [] | ||
| for time, frame in zip(timesteps, self.results.network): | ||
| result_dict = defaultdict(int) | ||
| self._traverse_water_network( | ||
| frame, | ||
| [], | ||
| analysis_func=analysis_func, | ||
| output=result_dict, | ||
| link_func=self._full_link, | ||
| **kwargs, | ||
| ) | ||
| result.append( | ||
| (time, sum([result_dict[key] for key in result_dict])) | ||
| ) | ||
| return result | ||
|
|
||
| def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): | ||
| s1_index, to_index, s1, to_residue, dist, angle = ( | ||
| self._expand_timeseries(current[0]) | ||
|
|
@@ -2016,11 +2076,15 @@ def timesteps_by_type(self, analysis_func=None, **kwargs): | |
|
|
||
| if self.results.network: | ||
| result = defaultdict(list) | ||
| if self.timesteps is None: | ||
| timesteps = self.results.timesteps | ||
| if timesteps is None: | ||
| timesteps = range(len(self.results.network)) | ||
| else: | ||
| timesteps = self.timesteps | ||
| for time, frame in zip(timesteps, self.results.network): | ||
| if ( | ||
| isinstance(time, (float, np.floating)) | ||
| and float(time).is_integer() | ||
| ): | ||
| time = int(time) | ||
| self._traverse_water_network( | ||
| frame, | ||
| [], | ||
|
|
@@ -2120,7 +2184,11 @@ def generate_table(self, output_format=None): | |
| # standard array, like this: | ||
| out = np.empty((num_records,), dtype=dtype) | ||
| cursor = 0 # current row | ||
| for t, hframe in zip(self.timesteps, timeseries): | ||
| timesteps = self.results.timesteps | ||
| if timesteps is None: | ||
| timesteps = range(len(timeseries)) | ||
|
|
||
| for t, hframe in zip(timesteps, timeseries): | ||
| for ( | ||
| donor_index, | ||
| acceptor_index, | ||
|
|
@@ -2150,8 +2218,20 @@ def generate_table(self, output_format=None): | |
| return table | ||
|
|
||
| def _conclude(self): | ||
| # saving timesteps in results for parallelization | ||
| self.results.timesteps = np.asarray(self._timesteps) | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove empty line? |
||
| self.results.timeseries = self._generate_timeseries() | ||
|
|
||
| def _get_aggregator(self): | ||
| return ResultsGroup( | ||
| lookup={ | ||
| "timeseries": ResultsGroup.ndarray_hstack, | ||
| "timesteps": ResultsGroup.ndarray_hstack, | ||
| "network": ResultsGroup.ndarray_hstack, | ||
| } | ||
| ) | ||
|
|
||
| @property | ||
| def network(self): | ||
| wmsg = ( | ||
|
|
@@ -2171,3 +2251,12 @@ def timeseries(self): | |
| ) | ||
| warnings.warn(wmsg, DeprecationWarning) | ||
| return self.results.timeseries | ||
|
|
||
| @property | ||
| def timesteps(self): | ||
| wmsg = ( | ||
| "The `timesteps` attribute is deprecated and will be removed in " | ||
| "MDAnalysis 3.0.0. Please use `results.timesteps` instead." | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recommend to use |
||
| ) | ||
| warnings.warn(wmsg, DeprecationWarning) | ||
| return self.results.timesteps | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,9 @@ | |
| from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( | ||
| HydrogenBondAnalysis, | ||
| ) | ||
| from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( | ||
| WaterBridgeAnalysis, | ||
| ) | ||
| from MDAnalysis.analysis.nucleicacids import NucPairDist | ||
| from MDAnalysis.analysis.contacts import Contacts | ||
| from MDAnalysis.analysis.density import DensityAnalysis | ||
|
|
@@ -217,3 +220,11 @@ def client_InterRDF_s(request): | |
| @pytest.fixture(scope="module", params=params_for_cls(DistanceMatrix)) | ||
| def client_DistanceMatrix(request): | ||
| return request.param | ||
|
|
||
|
|
||
| # MDAnalysis.analysis.hydrogenbonds.wbridge_analysis | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can remove, but would it not better to keep it for consistency with the other functions, that are also commented in that way like here:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would keep for consistency. |
||
|
|
||
|
|
||
| @pytest.fixture(scope="module", params=params_for_cls(WaterBridgeAnalysis)) | ||
| def client_WaterBridgeAnalysis(request): | ||
| return request.param | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need
results.timestepsand_timesteps? How does this differ from the standardtimesthat come with AnalysisBase