Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -82,6 +83,10 @@ public static BazelProfile createFromInputStream(InputStream inputStream)
new JsonReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)));
}

public static BazelProfile of(Reader reader) {
return new BazelProfile(new JsonReader(reader));
}

private final BazelVersion bazelVersion;
private final Map<String, String> otherData = new HashMap<>();
private final Map<ThreadId, ProfileThread> threads = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class ProfileThread {

@Nullable private String name;
@Nullable private Integer sortIndex;
private boolean sorted;

private final List<JsonObject> extraMetadata;
private final List<JsonObject> extraEvents;
Expand Down Expand Up @@ -111,8 +112,14 @@ public Integer getSortIndex() {
/**
* Parses a {@link JsonObject} as a tracing event and adds it to this thread. Returns {@code true}
* if parsing and adding the event was successful and {@code false} otherwise.
*
* @throws IllegalStateException if the thread has already been sorted, and as such cannot be
* modified anymore.
*/
public boolean addEvent(JsonObject event) {
if (sorted) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are instances of ProfileThread expected to be accessed concurrently?

Since getCompleteEvents synchronizes on this to access sorted, we must do the same here. Or make sorted and AtomicBoolean.

If access to some members must be synchronized, then it's likely that we should make all member access thread-safe.

Copy link
Copy Markdown
Contributor Author

@afrueda97 afrueda97 Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I think GitHub linked you to the previous commit, this code is gone now in favor of the builder pattern you suggested

(which, being fully immutable after built, this class should also now be safe to access concurrently)

throw new IllegalStateException("Cannot add event, Bazel profile thread has been sorted!");
}
try {
switch (event.get(TraceEventFormatConstants.EVENT_PHASE).getAsString()) {
case TraceEventFormatConstants.PHASE_COMPLETE: // Complete events
Expand Down Expand Up @@ -193,8 +200,13 @@ public boolean addEvent(JsonObject event) {
}

public List<CompleteEvent> getCompleteEvents() {
completeEvents.sort(Comparator.comparing((e) -> e.start));
return ImmutableList.copyOf(completeEvents);
synchronized (this) {
Copy link
Copy Markdown
Contributor

@laszlocsomor laszlocsomor Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like completeEvents is actually mutable, and mutated in ProfileThread.addEvent. So if getCompleteEvents is called, then addEvent, then getCompleteEvents again, the second call will return a possibly unsorted list.

Is it possible to make the object immutable?

If not, is there any guarantee of call order we could enforce? For example, if addEvent is never expected to be called after getCompleteEvents, then getCompleteEvents should "freeze" the object (set some flag), no longer allowing mutation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, although addEvent is only currently called when creating the BazelProfile

// TODO: Use success response to take action on errant events.
profileThread.addEvent(traceEvent);

It definitely wouldn't hurt to, for example, use an immutable list here to ensure we don't break the ordering afterwards

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make it so the code guarantees correct calling order? For example add a ProfileThread.Builder class with a mutable completeEvents that you can call addEvent on, then the final ProfileThread would have an ImmutableList as completeEvents (and it'd be sorted).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a check for this that throws an ISE if such modifications are attempted in 5aac8c1.

(also, a test for this)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, we were also doing a lot of expensive copying and re-sorting in other places which I missed. Switching to a builder pattern cleaned all of those up, should be done in 9868671

PTAL

if (!sorted) {
completeEvents.sort(Comparator.comparing((e) -> e.start));
sorted = true;
}
}
return completeEvents;
}

public ImmutableMap<String, ImmutableList<CounterEvent>> getCounts() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ java_library(
"//analyzer/java/com/engflow/bazel/invocation/analyzer/time",
"//analyzer/java/com/engflow/bazel/invocation/analyzer/traceeventformat",
"//third_party/guava",
"//third_party/jsr305",
],
)

Expand Down
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are the changes in this file relevant to this PR?
like the new variables such as cPathActionNameToSelfEvent and related logic

Copy link
Copy Markdown
Contributor Author

@afrueda97 afrueda97 Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are, unfortunately GitHub does a great job of not showing the differences.

Essentially the changes are intended to:

  1. Reduce how many loops are performed over bazelProfile.getCriticalPath().get().getCompleteEvents() and .getThreads().flatMap((thread) -> thread.getCompleteEvents().stream()). It should be happening just once for each of these lists now.
  2. "Unroll" complicated stream-based code into simpler for loops, to make it easier to get a sense of its performance and scalability
  3. Use more efficient data structures for lookups, which are the HashMaps and HashMultimaps at the top.

Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
import com.engflow.bazel.invocation.analyzer.time.Timestamp;
import com.engflow.bazel.invocation.analyzer.traceeventformat.CompleteEvent;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.collect.HashMultimap;
import java.time.Duration;
import java.util.HashSet;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

/**
* A {@link DataProvider} that supplies the duration spent queuing for remote execution within the
Expand Down Expand Up @@ -60,110 +62,174 @@ CriticalPathQueuingDuration getCriticalPathQueuingDuration()
// the relevant name, and further filtering by time interval.
// Given the matching event, find a queuing event with the same tid and pid that fits
// within the time interval.
Set<CompleteEvent> criticalPathEventsInThreads = new HashSet<>();
HashMultimap<PidTidKey, CompleteEvent> criticalPathEventsInThreads = HashMultimap.create();
if (bazelProfile.getCriticalPath().isEmpty()) {
return new CriticalPathQueuingDuration(EMPTY_REASON);
}
/*
* Key: critical action path event name.
* Value: the same critical action path, but in event form.
* Used to efficiently look up critical path events by their parsed name.
*/
HashMap<String, CompleteEvent> cPathActionNameToSelfEvent = new HashMap<>();
/*
* Key: critical action path event name.
* Values: all profile 'action processing' events that matched by name with the critical path
* counterpart.
* Used to keep track, for each critical path event, all the candidates that could be used to
* find its real process and thread IDs.
*/
HashMultimap<String, CompleteEvent> cPathActionNameToEventCandidates = HashMultimap.create();
/*
* Key: PidTidKey (process id and thread id in record form) of the remote queuing event
* Value: The remote queuing event
* This is used to efficiently match between a critical path event with the remote queuing
* event, by the process and thread IDs.
*/
HashMultimap<PidTidKey, CompleteEvent> remoteQueuingEvents = HashMultimap.create();

for (var cPathEvent : bazelProfile.getCriticalPath().get().getCompleteEvents()) {
Comment thread
afrueda97 marked this conversation as resolved.
if (Strings.isNullOrEmpty(cPathEvent.name)) {
continue;
}
Matcher m = CRITICAL_PATH_TO_EVENT_NAME.matcher(cPathEvent.name);
if (!m.matches()) {
continue;
}
var eventNameToFind = m.group(1);
cPathActionNameToSelfEvent.put(eventNameToFind, cPathEvent);
}

// This loop is extremely expensive. Make sure we only perform it once!
bazelProfile
.getCriticalPath()
.get()
.getCompleteEvents()
.getThreads()
.forEach(
(criticalPathEvent) -> {
Matcher m = CRITICAL_PATH_TO_EVENT_NAME.matcher(criticalPathEvent.name);
if (m.matches()) {
String eventNameToFind = m.group(1);
bazelProfile
.getThreads()
.flatMap((thread) -> thread.getCompleteEvents().stream())
// Name should match, and event interval should be contained in
// criticalPathEvent interval.
.filter(
(event) ->
BazelProfileConstants.CAT_ACTION_PROCESSING.equals(event.category)
&& eventNameToFind.equals(event.name)
// If "action processing" is the first event, the timestamp
// may be slightly out of sync with the critical path event.
&& (criticalPathEvent.start.almostEquals(event.start)
||
// It may not be the first event, e.g.
// "action dependency checking" may be reported before
criticalPathEvent.start.compareTo(event.start) > 0)
// Keep this always-true-condition for documentation purposes!
// We have found cases where the end time of the critical path
// event is less than the end time of the processing event.
// This might be a bug / inconsistency in Bazel profile writing.
&& (true
|| criticalPathEvent.end.almostEquals(event.end)
|| criticalPathEvent.end.compareTo(event.end) > 0))
// We expect to find just one event, but this may not be true for more
// generic action names. Sort all thus far matching events to find the best
// match.
.sorted(
(a, b) -> {
boolean aWithinBounds =
criticalPathEvent.end.almostEquals(a.end)
|| criticalPathEvent.end.compareTo(a.end) > 0;
boolean bWithinBounds =
criticalPathEvent.end.almostEquals(b.end)
|| criticalPathEvent.end.compareTo(b.end) > 0;
if (aWithinBounds && bWithinBounds) {
// Both events within bounds, prefer the longer one.
return b.duration.compareTo(a.duration);
}
// If one of the events is within the bounds, prefer it.
if (aWithinBounds) {
return -1;
}
if (bWithinBounds) {
return 1;
}
// Neither event within bounds, prefer the one that extends the bounds
// least.
return a.end.compareTo(b.end);
})
.limit(1)
.forEach(
e -> {
// As we could not check the end boundary above, adjust the duration here,
// so that we can ensure queuing events do not exceed the boundaries of
// the critical path entry.
Timestamp end =
Timestamp.ofMicros(
Math.min(e.end.getMicros(), criticalPathEvent.end.getMicros()));
criticalPathEventsInThreads.add(
new CompleteEvent(
e.name,
e.category,
e.start,
TimeUtil.getDurationBetween(e.start, end),
e.threadId,
e.processId,
e.args));
});
thread -> {
for (var event : thread.getCompleteEvents()) {
if (Strings.isNullOrEmpty(event.category)) {
continue;
}
switch (event.category) {
case BazelProfileConstants.CAT_ACTION_PROCESSING -> {
var cPathEvent = cPathActionNameToSelfEvent.get(event.name);
if (cPathEvent == null) {
continue;
}
// Found an event that matches a critical path event by name. Add it to the
// critical
// path event's list of candidates for later.
cPathActionNameToEventCandidates.put(event.name, event);
}
case BazelProfileConstants.CAT_REMOTE_EXECUTION_QUEUING_TIME -> {
// We'll need to iterate through these later to sum the queuing time. Keep track
// of this
// to avoid having to iterate through the full profile again.
remoteQueuingEvents.put(new PidTidKey(event.processId, event.threadId), event);
}
default -> {}
}
}
});
Duration duration =
bazelProfile
.getThreads()
.flatMap((thread) -> thread.getCompleteEvents().stream())
// Restrict to queuing events.
.filter(
(event) ->
BazelProfileConstants.CAT_REMOTE_EXECUTION_QUEUING_TIME.equals(event.category))
// Restrict to events that are contained in one of the critical path events.
.filter(
(event) ->
criticalPathEventsInThreads.stream()
.anyMatch(
(cpEvent) ->
cpEvent.threadId == event.threadId
&& cpEvent.processId == event.processId
&& (cpEvent.start.compareTo(event.start) <= 0)
&& (event.end.almostEquals(cpEvent.end)
|| (event.end.compareTo(cpEvent.end) <= 0))))
.map((event) -> event.duration)
.reduce(Duration.ZERO, Duration::plus);

// For each critical path event, loop through its list of candidates that matched by name.
// This is used to get the correct thread ID and process ID that will later be matched up to
// the remote queuing events, to calculate critical path queuing.
for (var cPathEventName : cPathActionNameToEventCandidates.keySet()) {
var cPathEvent = cPathActionNameToSelfEvent.get(cPathEventName);
if (cPathEvent == null) {
continue;
}

@Nullable CompleteEvent found = null;
var foundWithinBounds = false;
for (CompleteEvent event : cPathActionNameToEventCandidates.get(cPathEventName)) {
// If "action processing" is the first event, the timestamp
// may be slightly out of sync with the critical path event.
//
// It may not be the first event, e.g.
// "action dependency checking" may be reported before
if (!cPathEvent.start.almostEquals(event.start)
&& cPathEvent.start.compareTo(event.start) <= 0) {
continue;
}
// We have found cases where the end time of the critical path event is less than the end
// time of the processing event. This might be a bug / inconsistency in Bazel profile
// writing.
if (!cPathEvent.end.almostEquals(event.end) && cPathEvent.end.compareTo(event.end) <= 0) {
continue;
}
Comment on lines +156 to +163
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This came from L96 on the left, and that check was only there for "documentation purposes".
Now we'll actually do the check. Is that intentional?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. I've reverted this to an equivalent of how it originally was (always-false check now).


if (found == null) {
found = event;
foundWithinBounds =
cPathEvent.end.almostEquals(found.end) || cPathEvent.end.compareTo(found.end) > 0;
continue;
}

// We expect to find just one event, but this may not be true for more generic action
// names. Sort all thus far matching events to find the best match.
var eventWithinBounds =
cPathEvent.end.almostEquals(event.end) || cPathEvent.end.compareTo(event.end) > 0;

if (foundWithinBounds && eventWithinBounds) {
// Both events within bounds, prefer the longer one.
if (event.duration.compareTo(found.duration) > 0) {
found = event;
foundWithinBounds = true;
Comment thread
afrueda97 marked this conversation as resolved.
Outdated
}
continue;
}
// If one of the events is within the bounds, prefer it.
if (eventWithinBounds) {
found = event;
foundWithinBounds = true;
continue;
}
// Neither event within bounds, prefer the one that extends the bounds
// least.
if (found.end.compareTo(event.end) < 0) {
found = event;
foundWithinBounds = false;
}
}
if (found != null) {
// As we could not check the end boundary above, adjust the duration here,
// so that we can ensure queuing events do not exceed the boundaries of
// the critical path entry.
Timestamp end =
Timestamp.ofMicros(Math.min(found.end.getMicros(), cPathEvent.end.getMicros()));
criticalPathEventsInThreads.put(
new PidTidKey(found.processId, found.threadId),
new CompleteEvent(
found.name,
found.category,
found.start,
TimeUtil.getDurationBetween(found.start, end),
found.threadId,
found.processId,
found.args));
}
}

Duration duration = Duration.ZERO;
for (var remoteQueuingEventEntry : remoteQueuingEvents.entries()) {
var remoteQueuingEvent = remoteQueuingEventEntry.getValue();
boolean found = false;
for (var criticalPathEventInThread :
criticalPathEventsInThreads.get(remoteQueuingEventEntry.getKey())) {
if (criticalPathEventInThread.start.compareTo(remoteQueuingEvent.start) <= 0
&& (remoteQueuingEvent.end.almostEquals(criticalPathEventInThread.end)
|| (remoteQueuingEvent.end.compareTo(criticalPathEventInThread.end) <= 0))) {
found = true;
break;
}
}
if (found) {
duration = duration.plus(remoteQueuingEvent.duration);
}
}
return new CriticalPathQueuingDuration(duration);
}

private record PidTidKey(int pid, int tid) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ java_test(
"//analyzer/java/com/engflow/bazel/invocation/analyzer/time",
"//analyzer/java/com/engflow/bazel/invocation/analyzer/traceeventformat",
"//analyzer/javatests/com/engflow/bazel/invocation/analyzer:test_base",
"//third_party/gson",
"//third_party/guava",
"//third_party/junit",
"//third_party/truth",
Expand Down
Loading