-
Notifications
You must be signed in to change notification settings - Fork 11
Reduce complexity of CriticalPathQueuingDurationDataProvider #194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
cdb011d
5aac8c1
ae84789
9868671
89291c0
66ff455
06a4e6a
4f6fab5
4acc238
5258cc3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -40,6 +40,7 @@ public class ProfileThread { | |||||
|
|
||||||
| @Nullable private String name; | ||||||
| @Nullable private Integer sortIndex; | ||||||
| private boolean sorted; | ||||||
|
|
||||||
| private final List<JsonObject> extraMetadata; | ||||||
| private final List<JsonObject> extraEvents; | ||||||
|
|
@@ -111,8 +112,14 @@ public Integer getSortIndex() { | |||||
| /** | ||||||
| * Parses a {@link JsonObject} as a tracing event and adds it to this thread. Returns {@code true} | ||||||
| * if parsing and adding the event was successful and {@code false} otherwise. | ||||||
| * | ||||||
| * @throws IllegalStateException if the thread has already been sorted, and as such cannot be | ||||||
| * modified anymore. | ||||||
| */ | ||||||
| public boolean addEvent(JsonObject event) { | ||||||
| if (sorted) { | ||||||
| throw new IllegalStateException("Cannot add event, Bazel profile thread has been sorted!"); | ||||||
| } | ||||||
| try { | ||||||
| switch (event.get(TraceEventFormatConstants.EVENT_PHASE).getAsString()) { | ||||||
| case TraceEventFormatConstants.PHASE_COMPLETE: // Complete events | ||||||
|
|
@@ -193,8 +200,13 @@ public boolean addEvent(JsonObject event) { | |||||
| } | ||||||
|
|
||||||
| public List<CompleteEvent> getCompleteEvents() { | ||||||
| completeEvents.sort(Comparator.comparing((e) -> e.start)); | ||||||
| return ImmutableList.copyOf(completeEvents); | ||||||
| synchronized (this) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like Is it possible to make the object immutable? If not, is there any guarantee of call order we could enforce? For example, if
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, although Lines 133 to 134 in cdb011d
It definitely wouldn't hurt to, for example, use an immutable list here to ensure we don't break the ordering afterwards
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make it so the code guarantees correct calling order? For example add a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a check for this that throws an (also, a test for this)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, we were also doing a lot of expensive copying and re-sorting in other places which I missed. Switching to a builder pattern cleaned all of those up, should be done in 9868671 PTAL |
||||||
| if (!sorted) { | ||||||
| completeEvents.sort(Comparator.comparing((e) -> e.start)); | ||||||
| sorted = true; | ||||||
| } | ||||||
| } | ||||||
| return completeEvents; | ||||||
| } | ||||||
|
|
||||||
| public ImmutableMap<String, ImmutableList<CounterEvent>> getCounts() { | ||||||
|
|
||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are the changes in this file relevant to this PR?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are, unfortunately GitHub does a great job of not showing the differences. Essentially the changes are intended to:
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,12 +26,14 @@ | |
| import com.engflow.bazel.invocation.analyzer.time.Timestamp; | ||
| import com.engflow.bazel.invocation.analyzer.traceeventformat.CompleteEvent; | ||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.base.Strings; | ||
| import com.google.common.collect.HashMultimap; | ||
| import java.time.Duration; | ||
| import java.util.HashSet; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Set; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
| import javax.annotation.Nullable; | ||
|
|
||
| /** | ||
| * A {@link DataProvider} that supplies the duration spent queuing for remote execution within the | ||
|
|
@@ -60,110 +62,174 @@ CriticalPathQueuingDuration getCriticalPathQueuingDuration() | |
| // the relevant name, and further filtering by time interval. | ||
| // Given the matching event, find a queuing event with the same tid and pid that fits | ||
| // within the time interval. | ||
| Set<CompleteEvent> criticalPathEventsInThreads = new HashSet<>(); | ||
| HashMultimap<PidTidKey, CompleteEvent> criticalPathEventsInThreads = HashMultimap.create(); | ||
| if (bazelProfile.getCriticalPath().isEmpty()) { | ||
| return new CriticalPathQueuingDuration(EMPTY_REASON); | ||
| } | ||
| /* | ||
| * Key: critical action path event name. | ||
| * Value: the same critical action path, but in event form. | ||
| * Used to efficiently look up critical path events by their parsed name. | ||
| */ | ||
| HashMap<String, CompleteEvent> cPathActionNameToSelfEvent = new HashMap<>(); | ||
| /* | ||
| * Key: critical action path event name. | ||
| * Values: all profile 'action processing' events that matched by name with the critical path | ||
| * counterpart. | ||
| * Used to keep track, for each critical path event, all the candidates that could be used to | ||
| * find its real process and thread IDs. | ||
| */ | ||
| HashMultimap<String, CompleteEvent> cPathActionNameToEventCandidates = HashMultimap.create(); | ||
| /* | ||
| * Key: PidTidKey (process id and thread id in record form) of the remote queuing event | ||
| * Value: The remote queuing event | ||
| * This is used to efficiently match between a critical path event with the remote queuing | ||
| * event, by the process and thread IDs. | ||
| */ | ||
| HashMultimap<PidTidKey, CompleteEvent> remoteQueuingEvents = HashMultimap.create(); | ||
|
|
||
| for (var cPathEvent : bazelProfile.getCriticalPath().get().getCompleteEvents()) { | ||
|
afrueda97 marked this conversation as resolved.
|
||
| if (Strings.isNullOrEmpty(cPathEvent.name)) { | ||
| continue; | ||
| } | ||
| Matcher m = CRITICAL_PATH_TO_EVENT_NAME.matcher(cPathEvent.name); | ||
| if (!m.matches()) { | ||
| continue; | ||
| } | ||
| var eventNameToFind = m.group(1); | ||
| cPathActionNameToSelfEvent.put(eventNameToFind, cPathEvent); | ||
| } | ||
|
|
||
| // This loop is extremely expensive. Make sure we only perform it once! | ||
| bazelProfile | ||
| .getCriticalPath() | ||
| .get() | ||
| .getCompleteEvents() | ||
| .getThreads() | ||
| .forEach( | ||
| (criticalPathEvent) -> { | ||
| Matcher m = CRITICAL_PATH_TO_EVENT_NAME.matcher(criticalPathEvent.name); | ||
| if (m.matches()) { | ||
| String eventNameToFind = m.group(1); | ||
| bazelProfile | ||
| .getThreads() | ||
| .flatMap((thread) -> thread.getCompleteEvents().stream()) | ||
| // Name should match, and event interval should be contained in | ||
| // criticalPathEvent interval. | ||
| .filter( | ||
| (event) -> | ||
| BazelProfileConstants.CAT_ACTION_PROCESSING.equals(event.category) | ||
| && eventNameToFind.equals(event.name) | ||
| // If "action processing" is the first event, the timestamp | ||
| // may be slightly out of sync with the critical path event. | ||
| && (criticalPathEvent.start.almostEquals(event.start) | ||
| || | ||
| // It may not be the first event, e.g. | ||
| // "action dependency checking" may be reported before | ||
| criticalPathEvent.start.compareTo(event.start) > 0) | ||
| // Keep this always-true-condition for documentation purposes! | ||
| // We have found cases where the end time of the critical path | ||
| // event is less than the end time of the processing event. | ||
| // This might be a bug / inconsistency in Bazel profile writing. | ||
| && (true | ||
| || criticalPathEvent.end.almostEquals(event.end) | ||
| || criticalPathEvent.end.compareTo(event.end) > 0)) | ||
| // We expect to find just one event, but this may not be true for more | ||
| // generic action names. Sort all thus far matching events to find the best | ||
| // match. | ||
| .sorted( | ||
| (a, b) -> { | ||
| boolean aWithinBounds = | ||
| criticalPathEvent.end.almostEquals(a.end) | ||
| || criticalPathEvent.end.compareTo(a.end) > 0; | ||
| boolean bWithinBounds = | ||
| criticalPathEvent.end.almostEquals(b.end) | ||
| || criticalPathEvent.end.compareTo(b.end) > 0; | ||
| if (aWithinBounds && bWithinBounds) { | ||
| // Both events within bounds, prefer the longer one. | ||
| return b.duration.compareTo(a.duration); | ||
| } | ||
| // If one of the events is within the bounds, prefer it. | ||
| if (aWithinBounds) { | ||
| return -1; | ||
| } | ||
| if (bWithinBounds) { | ||
| return 1; | ||
| } | ||
| // Neither event within bounds, prefer the one that extends the bounds | ||
| // least. | ||
| return a.end.compareTo(b.end); | ||
| }) | ||
| .limit(1) | ||
| .forEach( | ||
| e -> { | ||
| // As we could not check the end boundary above, adjust the duration here, | ||
| // so that we can ensure queuing events do not exceed the boundaries of | ||
| // the critical path entry. | ||
| Timestamp end = | ||
| Timestamp.ofMicros( | ||
| Math.min(e.end.getMicros(), criticalPathEvent.end.getMicros())); | ||
| criticalPathEventsInThreads.add( | ||
| new CompleteEvent( | ||
| e.name, | ||
| e.category, | ||
| e.start, | ||
| TimeUtil.getDurationBetween(e.start, end), | ||
| e.threadId, | ||
| e.processId, | ||
| e.args)); | ||
| }); | ||
| thread -> { | ||
| for (var event : thread.getCompleteEvents()) { | ||
| if (Strings.isNullOrEmpty(event.category)) { | ||
| continue; | ||
| } | ||
| switch (event.category) { | ||
| case BazelProfileConstants.CAT_ACTION_PROCESSING -> { | ||
| var cPathEvent = cPathActionNameToSelfEvent.get(event.name); | ||
| if (cPathEvent == null) { | ||
| continue; | ||
| } | ||
| // Found an event that matches a critical path event by name. Add it to the | ||
| // critical | ||
| // path event's list of candidates for later. | ||
| cPathActionNameToEventCandidates.put(event.name, event); | ||
| } | ||
| case BazelProfileConstants.CAT_REMOTE_EXECUTION_QUEUING_TIME -> { | ||
| // We'll need to iterate through these later to sum the queuing time. Keep track | ||
| // of this | ||
| // to avoid having to iterate through the full profile again. | ||
| remoteQueuingEvents.put(new PidTidKey(event.processId, event.threadId), event); | ||
| } | ||
| default -> {} | ||
| } | ||
| } | ||
| }); | ||
| Duration duration = | ||
| bazelProfile | ||
| .getThreads() | ||
| .flatMap((thread) -> thread.getCompleteEvents().stream()) | ||
| // Restrict to queuing events. | ||
| .filter( | ||
| (event) -> | ||
| BazelProfileConstants.CAT_REMOTE_EXECUTION_QUEUING_TIME.equals(event.category)) | ||
| // Restrict to events that are contained in one of the critical path events. | ||
| .filter( | ||
| (event) -> | ||
| criticalPathEventsInThreads.stream() | ||
| .anyMatch( | ||
| (cpEvent) -> | ||
| cpEvent.threadId == event.threadId | ||
| && cpEvent.processId == event.processId | ||
| && (cpEvent.start.compareTo(event.start) <= 0) | ||
| && (event.end.almostEquals(cpEvent.end) | ||
| || (event.end.compareTo(cpEvent.end) <= 0)))) | ||
| .map((event) -> event.duration) | ||
| .reduce(Duration.ZERO, Duration::plus); | ||
|
|
||
| // For each critical path event, loop through its list of candidates that matched by name. | ||
| // This is used to get the correct thread ID and process ID that will later be matched up to | ||
| // the remote queuing events, to calculate critical path queuing. | ||
| for (var cPathEventName : cPathActionNameToEventCandidates.keySet()) { | ||
| var cPathEvent = cPathActionNameToSelfEvent.get(cPathEventName); | ||
| if (cPathEvent == null) { | ||
| continue; | ||
| } | ||
|
|
||
| @Nullable CompleteEvent found = null; | ||
| var foundWithinBounds = false; | ||
| for (CompleteEvent event : cPathActionNameToEventCandidates.get(cPathEventName)) { | ||
| // If "action processing" is the first event, the timestamp | ||
| // may be slightly out of sync with the critical path event. | ||
| // | ||
| // It may not be the first event, e.g. | ||
| // "action dependency checking" may be reported before | ||
| if (!cPathEvent.start.almostEquals(event.start) | ||
| && cPathEvent.start.compareTo(event.start) <= 0) { | ||
| continue; | ||
| } | ||
| // We have found cases where the end time of the critical path event is less than the end | ||
| // time of the processing event. This might be a bug / inconsistency in Bazel profile | ||
| // writing. | ||
| if (!cPathEvent.end.almostEquals(event.end) && cPathEvent.end.compareTo(event.end) <= 0) { | ||
| continue; | ||
| } | ||
|
Comment on lines
+156
to
+163
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This came from L96 on the left, and that check was only there for "documentation purposes".
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right. I've reverted this to an equivalent of how it originally was (always-false check now). |
||
|
|
||
| if (found == null) { | ||
| found = event; | ||
| foundWithinBounds = | ||
| cPathEvent.end.almostEquals(found.end) || cPathEvent.end.compareTo(found.end) > 0; | ||
| continue; | ||
| } | ||
|
|
||
| // We expect to find just one event, but this may not be true for more generic action | ||
| // names. Sort all thus far matching events to find the best match. | ||
| var eventWithinBounds = | ||
| cPathEvent.end.almostEquals(event.end) || cPathEvent.end.compareTo(event.end) > 0; | ||
|
|
||
| if (foundWithinBounds && eventWithinBounds) { | ||
| // Both events within bounds, prefer the longer one. | ||
| if (event.duration.compareTo(found.duration) > 0) { | ||
| found = event; | ||
| foundWithinBounds = true; | ||
|
afrueda97 marked this conversation as resolved.
Outdated
|
||
| } | ||
| continue; | ||
| } | ||
| // If one of the events is within the bounds, prefer it. | ||
| if (eventWithinBounds) { | ||
| found = event; | ||
| foundWithinBounds = true; | ||
| continue; | ||
| } | ||
| // Neither event within bounds, prefer the one that extends the bounds | ||
| // least. | ||
| if (found.end.compareTo(event.end) < 0) { | ||
| found = event; | ||
| foundWithinBounds = false; | ||
| } | ||
| } | ||
| if (found != null) { | ||
| // As we could not check the end boundary above, adjust the duration here, | ||
| // so that we can ensure queuing events do not exceed the boundaries of | ||
| // the critical path entry. | ||
| Timestamp end = | ||
| Timestamp.ofMicros(Math.min(found.end.getMicros(), cPathEvent.end.getMicros())); | ||
| criticalPathEventsInThreads.put( | ||
| new PidTidKey(found.processId, found.threadId), | ||
| new CompleteEvent( | ||
| found.name, | ||
| found.category, | ||
| found.start, | ||
| TimeUtil.getDurationBetween(found.start, end), | ||
| found.threadId, | ||
| found.processId, | ||
| found.args)); | ||
| } | ||
| } | ||
|
|
||
| Duration duration = Duration.ZERO; | ||
| for (var remoteQueuingEventEntry : remoteQueuingEvents.entries()) { | ||
| var remoteQueuingEvent = remoteQueuingEventEntry.getValue(); | ||
| boolean found = false; | ||
| for (var criticalPathEventInThread : | ||
| criticalPathEventsInThreads.get(remoteQueuingEventEntry.getKey())) { | ||
| if (criticalPathEventInThread.start.compareTo(remoteQueuingEvent.start) <= 0 | ||
| && (remoteQueuingEvent.end.almostEquals(criticalPathEventInThread.end) | ||
| || (remoteQueuingEvent.end.compareTo(criticalPathEventInThread.end) <= 0))) { | ||
| found = true; | ||
| break; | ||
| } | ||
| } | ||
| if (found) { | ||
| duration = duration.plus(remoteQueuingEvent.duration); | ||
| } | ||
| } | ||
| return new CriticalPathQueuingDuration(duration); | ||
| } | ||
|
|
||
| private record PidTidKey(int pid, int tid) {} | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are instances of
ProfileThreadexpected to be accessed concurrently?Since
getCompleteEventssynchronizes onthisto accesssorted, we must do the same here. Or makesortedandAtomicBoolean.If access to some members must be synchronized, then it's likely that we should make all member access thread-safe.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I think GitHub linked you to the previous commit, this code is gone now in favor of the builder pattern you suggested
(which, being fully immutable after built, this class should also now be safe to access concurrently)