Skip to content

Commit 16a899d

Browse files
authored
fix(NODE-7430): throw timeout error when withTransaction retries exceed deadline (#4897)
1 parent 1fc0e09 commit 16a899d

17 files changed

Lines changed: 571 additions & 124 deletions

src/sessions.ts

Lines changed: 106 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
MongoErrorLabel,
1818
MongoExpiredSessionError,
1919
MongoInvalidArgumentError,
20+
MongoOperationTimeoutError,
2021
MongoRuntimeError,
2122
MongoServerError,
2223
MongoTransactionError,
@@ -725,7 +726,7 @@ export class ClientSession
725726
timeoutMS?: number;
726727
}
727728
): Promise<T> {
728-
const MAX_TIMEOUT = 120000;
729+
const MAX_TIMEOUT = 120_000;
729730

730731
const timeoutMS = options?.timeoutMS ?? this.timeoutMS ?? null;
731732
this.timeoutContext =
@@ -737,10 +738,27 @@ export class ClientSession
737738
})
738739
: null;
739740

740-
// 1. Record the current monotonic time, which will be used to enforce the 120-second timeout before later retry attempts.
741-
const startTime = this.timeoutContext?.csotEnabled() // This is strictly to appease TS. We must narrow the context to a CSOT context before accessing `.start`.
742-
? this.timeoutContext.start
743-
: processTimeMS();
741+
// 1. Define the following:
742+
// 1.1 Record the current monotonic time, which will be used to enforce the 120-second / CSOT timeout before later retry attempts.
743+
// 1.2 Set `transactionAttempt` to `0`.
744+
// 1.3 Set `TIMEOUT_MS` to be `timeoutMS` if given, otherwise MAX_TIMEOUT (120-seconds).
745+
746+
// Timeout Error propagation
747+
// When the previously encountered error needs to be propagated because there is no more time for another attempt,
748+
// and it is not already a timeout error, then:
749+
// - A timeout error MUST be propagated instead. It MUST expose the previously encountered error as specified in
750+
// the "Errors" section of the CSOT specification.
751+
// - If exposing the previously encountered error from a timeout error is impossible in a driver, then the driver
752+
// is exempt from the requirement and MUST propagate the previously encountered error as is. The timeout error
753+
// MUST copy all error labels from the previously encountered error.
754+
755+
// The spec describes timeout checks as "elapsed time < TIMEOUT_MS" (where elapsed = now - start).
756+
// We precompute `deadline = now + remainingTimeMS` so each check becomes simply `now < deadline`.
757+
const csotEnabled = !!this.timeoutContext?.csotEnabled();
758+
const remainingTimeMS = this.timeoutContext?.csotEnabled()
759+
? this.timeoutContext.remainingTimeMS
760+
: MAX_TIMEOUT;
761+
const deadline = processTimeMS() + remainingTimeMS;
744762

745763
let committed = false;
746764
let result: T;
@@ -749,20 +767,20 @@ export class ClientSession
749767

750768
try {
751769
retryTransaction: for (
752-
// 2. Set `transactionAttempt` to `0`.
753770
let transactionAttempt = 0, isRetry = false;
754771
!committed;
755772
++transactionAttempt, isRetry = transactionAttempt > 0
756773
) {
757774
// 2. If `transactionAttempt` > 0:
758775
if (isRetry) {
759-
// 2.i If elapsed time + `backoffMS` > `TIMEOUT_MS`, then raise the previously encountered error. If the elapsed time of
760-
// `withTransaction` is less than TIMEOUT_MS, calculate the backoffMS to be
761-
// `jitter * min(BACKOFF_INITIAL * 1.5 ** (transactionAttempt - 1), BACKOFF_MAX)`. sleep for `backoffMS`.
762-
// 2.i.i jitter is a random float between \[0, 1)
763-
// 2.i.ii `transactionAttempt` is the variable defined in step 1.
764-
// 2.i.iii `BACKOFF_INITIAL` is 5ms
765-
// 2.i.iv `BACKOFF_MAX` is 500ms
776+
// 2.1 Calculate backoffMS to be jitter * min(BACKOFF_INITIAL * 1.5 ** (transactionAttempt - 1), BACKOFF_MAX).
777+
// If elapsed time + backoffMS > TIMEOUT_MS, then propagate the previously encountered error to the caller of
778+
// withTransaction as per timeout error propagation and return immediately. Otherwise, sleep for backoffMS.
779+
// 2.1.1 jitter is a random float between [0, 1), optionally including 1, depending on what is most natural
780+
// for the given driver language.
781+
// 2.1.2 transactionAttempt is the variable defined in step 1.
782+
// 2.1.3 BACKOFF_INITIAL is 5ms
783+
// 2.1.4 BACKOFF_MAX is 500ms
766784
const BACKOFF_INITIAL_MS = 5;
767785
const BACKOFF_MAX_MS = 500;
768786
const BACKOFF_GROWTH = 1.5;
@@ -774,136 +792,123 @@ export class ClientSession
774792
BACKOFF_MAX_MS
775793
);
776794

777-
const willExceedTransactionDeadline =
778-
(this.timeoutContext?.csotEnabled() &&
779-
backoffMS > this.timeoutContext.remainingTimeMS) ||
780-
processTimeMS() + backoffMS > startTime + MAX_TIMEOUT;
781-
782-
if (willExceedTransactionDeadline) {
783-
throw (
795+
if (processTimeMS() + backoffMS >= deadline) {
796+
throw makeTimeoutError(
784797
lastError ??
785-
new MongoRuntimeError(
786-
`Transaction retry did not record an error: should never occur. Please file a bug.`
787-
)
798+
new MongoRuntimeError(
799+
`Transaction retry did not record an error: should never occur. Please file a bug.`
800+
),
801+
csotEnabled
788802
);
789803
}
790804

791805
await setTimeout(backoffMS);
792806
}
793807

794-
// 3. Invoke startTransaction on the session
795-
// 4. If `startTransaction` reported an error, propagate that error to the caller of `withTransaction` and return immediately.
796-
this.startTransaction(options); // may throw on error
808+
// 3. Invoke startTransaction on the session and increment transactionAttempt. If TransactionOptions were
809+
// specified in the call to withTransaction, those MUST be used for startTransaction. Note that
810+
// ClientSession.defaultTransactionOptions will be used in the absence of any explicit TransactionOptions.
811+
// 4. If startTransaction reported an error, propagate that error to the caller of withTransaction as is and
812+
// return immediately.
813+
this.startTransaction(options);
797814

798815
try {
799-
// 5. Invoke the callback.
800-
// 6. Control returns to withTransaction. (continued below)
816+
// 5. Invoke the callback. Drivers MUST ensure that the ClientSession can be accessed within the callback
817+
// (e.g. pass ClientSession as the first parameter, rely on lexical scoping). Drivers MAY pass additional
818+
// parameters as needed (e.g. user data solicited by withTransaction).
801819
const promise = fn(this);
802820
if (!isPromiseLike(promise)) {
803821
throw new MongoInvalidArgumentError(
804822
'Function provided to `withTransaction` must return a Promise'
805823
);
806824
}
807825

826+
// 6. Control returns to withTransaction. Determine the current state of the ClientSession and whether the
827+
// callback reported an error (e.g. thrown exception, error output parameter).
808828
result = await promise;
809829

810-
// 6. (cont.) Determine the current state of the ClientSession (continued below)
830+
// 8. If the ClientSession is in the "no transaction", "transaction aborted", or "transaction committed"
831+
// state, assume the callback intentionally aborted or committed the transaction and return immediately.
811832
if (
812833
this.transaction.state === TxnState.NO_TRANSACTION ||
813834
this.transaction.state === TxnState.TRANSACTION_COMMITTED ||
814835
this.transaction.state === TxnState.TRANSACTION_ABORTED
815836
) {
816-
// 8. If the ClientSession is in the "no transaction", "transaction aborted", or "transaction committed" state,
817-
// assume the callback intentionally aborted or committed the transaction and return immediately.
818837
return result;
819838
}
820-
// 5. (cont.) and whether the callback reported an error
821-
// 7. If the callback reported an error:
822839
} catch (fnError) {
840+
// 7. If the callback reported an error
823841
if (!(fnError instanceof MongoError) || fnError instanceof MongoInvalidArgumentError) {
824842
// This first preemptive abort regardless of TxnState isn't spec,
825843
// and it's unclear whether it's serving a practical purpose, but this logic is OLD
826844
await this.abortTransaction();
827845
throw fnError;
828846
}
829847

848+
lastError = fnError;
849+
850+
// 7.1 If the ClientSession is in the "starting transaction" or "transaction in progress"
851+
// state, invoke abortTransaction on the session.
830852
if (
831853
this.transaction.state === TxnState.STARTING_TRANSACTION ||
832854
this.transaction.state === TxnState.TRANSACTION_IN_PROGRESS
833855
) {
834-
// 7.i If the ClientSession is in the "starting transaction" or "transaction in progress" state,
835-
// invoke abortTransaction on the session
836856
await this.abortTransaction();
837857
}
838858

839-
if (
840-
fnError.hasErrorLabel(MongoErrorLabel.TransientTransactionError) &&
841-
(this.timeoutContext?.csotEnabled() || processTimeMS() - startTime < MAX_TIMEOUT)
842-
) {
843-
// 7.ii If the callback's error includes a "TransientTransactionError" label and the elapsed time of `withTransaction`
844-
// is less than 120 seconds, jump back to step two.
845-
lastError = fnError;
859+
// 7.2 If the callback's error includes a "TransientTransactionError" label, jump back to step two.
860+
if (fnError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) {
861+
if (processTimeMS() >= deadline) {
862+
throw makeTimeoutError(lastError, csotEnabled);
863+
}
846864
continue retryTransaction;
847865
}
848866

849-
// 7.iii If the callback's error includes a "UnknownTransactionCommitResult" label, the callback must have manually committed a transaction,
850-
// propagate the callback's error to the caller of withTransaction and return immediately.
851-
// The 7.iii check is redundant with 6.iv, so we don't write code for it
852-
// 7.iv Otherwise, propagate the callback's error to the caller of withTransaction and return immediately.
867+
// 7.3 If the callback's error includes a "UnknownTransactionCommitResult" label, the callback must
868+
// have manually committed a transaction, propagate the callback's error to the caller of withTransaction
869+
// as is and return immediately.
870+
// 7.4 Otherwise, propagate the callback's error to the caller of withTransaction as is and return immediately.
853871
throw fnError;
854872
}
855873

856874
retryCommit: while (!committed) {
857875
try {
858-
/*
859-
* We will rely on ClientSession.commitTransaction() to
860-
* apply a majority write concern if commitTransaction is
861-
* being retried (see: DRIVERS-601)
862-
*/
863876
// 9. Invoke commitTransaction on the session.
864877
await this.commitTransaction();
865878
committed = true;
866-
// 10. If commitTransaction reported an error:
867879
} catch (commitError) {
868-
// If CSOT is enabled, we repeatedly retry until timeoutMS expires. This is enforced by providing a
869-
// timeoutContext to each async API, which know how to cancel themselves (i.e., the next retry will
870-
// abort the withTransaction call).
871-
// If CSOT is not enabled, do we still have time remaining or have we timed out?
872-
const hasTimedOut =
873-
!this.timeoutContext?.csotEnabled() && processTimeMS() - startTime >= MAX_TIMEOUT;
874-
875-
if (!hasTimedOut) {
876-
/*
877-
* Note: a maxTimeMS error will have the MaxTimeMSExpired
878-
* code (50) and can be reported as a top-level error or
879-
* inside writeConcernError, ex.
880-
* { ok:0, code: 50, codeName: 'MaxTimeMSExpired' }
881-
* { ok:1, writeConcernError: { code: 50, codeName: 'MaxTimeMSExpired' } }
882-
*/
883-
if (
884-
!isMaxTimeMSExpiredError(commitError) &&
885-
commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult)
886-
) {
887-
// 10.i If the `commitTransaction` error includes a "UnknownTransactionCommitResult" label and the error is not
888-
// MaxTimeMSExpired and the elapsed time of `withTransaction` is less than 120 seconds, jump back to step eight.
889-
continue retryCommit;
880+
// 10. If commitTransaction reported an error:
881+
lastError = commitError;
882+
883+
// 10.1 If the commitTransaction error includes a UnknownTransactionCommitResult label and the error is
884+
// not MaxTimeMSExpired
885+
if (
886+
commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult) &&
887+
!isMaxTimeMSExpiredError(commitError)
888+
) {
889+
// 10.1.1 If the elapsed time of withTransaction exceeded TIMEOUT_MS, propagate the commitTransaction
890+
// error to the caller of withTransaction as per timeout error propagation and return immediately.
891+
if (processTimeMS() >= deadline) {
892+
throw makeTimeoutError(commitError, csotEnabled);
890893
}
894+
// 10.1.2 Otherwise, jump back to step nine. We will trust commitTransaction to apply a majority write
895+
// concern on retry attempts (see: Majority write concern is used when retrying commitTransaction).
896+
continue retryCommit;
897+
}
891898

892-
if (commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) {
893-
// 10.ii If the commitTransaction error includes a "TransientTransactionError" label
894-
// and the elapsed time of withTransaction is less than 120 seconds, jump back to step two.
895-
lastError = commitError;
896-
897-
continue retryTransaction;
898-
}
899+
// 10.2 If the commitTransaction error includes a TransientTransactionError label, jump back to step two.
900+
if (commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) {
901+
continue retryTransaction;
899902
}
900903

901-
// 10.iii Otherwise, propagate the commitTransaction error to the caller of withTransaction and return immediately.
904+
// 10.3 Otherwise, propagate the commitTransaction error to the caller of withTransaction as is and return
905+
// immediately.
902906
throw commitError;
903907
}
904908
}
905909
}
906910

911+
// 11. The transaction was committed successfully. Return immediately.
907912
// @ts-expect-error Result is always defined if we reach here, the for-loop above convinces TS it is not.
908913
return result;
909914
} finally {
@@ -912,6 +917,25 @@ export class ClientSession
912917
}
913918
}
914919

920+
function makeTimeoutError(cause: Error, csotEnabled: boolean): Error {
921+
// Async APIs know how to cancel themselves and might return CSOT error
922+
if (cause instanceof MongoOperationTimeoutError) {
923+
return cause;
924+
}
925+
if (csotEnabled) {
926+
const timeoutError = new MongoOperationTimeoutError('Timed out during withTransaction', {
927+
cause
928+
});
929+
if (cause instanceof MongoError) {
930+
for (const label of cause.errorLabels) {
931+
timeoutError.addErrorLabel(label);
932+
}
933+
}
934+
return timeoutError;
935+
}
936+
return cause;
937+
}
938+
915939
const NON_DETERMINISTIC_WRITE_CONCERN_ERRORS = new Set([
916940
'CannotSatisfyWriteConcern',
917941
'UnknownReplWriteConcern',

0 commit comments

Comments
 (0)