diff --git a/mooncake-transfer-engine/tent/include/tent/runtime/transport_selector.h b/mooncake-transfer-engine/tent/include/tent/runtime/transport_selector.h index eaa2341f33..22d55ebb03 100644 --- a/mooncake-transfer-engine/tent/include/tent/runtime/transport_selector.h +++ b/mooncake-transfer-engine/tent/include/tent/runtime/transport_selector.h @@ -117,6 +117,19 @@ struct SelectionPolicy { // Transport preference list (evaluated in order) std::vector transports; + + // --- Link-layer QoS attributes (RFC #2519 / #2568, step-1 schema only) --- + // These let a policy carry per-policy fabric QoS instead of the + // process-global MC_IB_SL / MC_IB_TC. nullopt = fall back to the global + // RdmaParams value (today's behavior). InfiniBand Service Level (0-15) and + // Traffic Class / DSCP (0-255). + std::optional service_level; // nullopt = use global default + std::optional traffic_class; // nullopt = use global default + // Named QP pool this policy's traffic should land on. Reserved here so the + // schema is forward-compatible: step 1 only parses/stores it; the actual + // per-class QP pool creation and routing is a follow-up (step 2). Unset = + // the current single "data QP" path. + std::optional qp_pool; }; /** @@ -125,6 +138,12 @@ struct SelectionPolicy { struct SelectionResult { TransportType transport = UNSPEC; uint64_t device_mask = ~0ULL; // Bitmask of allowed devices (~0 = all) + // Resolved link-layer QoS from the matched policy (RFC #2519 / #2568). + // nullopt = use the global RdmaParams default. Carried here for the + // follow-up that applies them at QP setup; step 1 only plumbs the values. + std::optional service_level; + std::optional traffic_class; + std::optional qp_pool; }; /** diff --git a/mooncake-transfer-engine/tent/src/runtime/transport_selector.cpp b/mooncake-transfer-engine/tent/src/runtime/transport_selector.cpp index e26f1e78a0..1b49077079 100644 --- a/mooncake-transfer-engine/tent/src/runtime/transport_selector.cpp +++ b/mooncake-transfer-engine/tent/src/runtime/transport_selector.cpp @@ -206,6 +206,34 @@ void TransportSelector::loadPolicies() { } } + // Parse link-layer QoS attributes (RFC #2519 / #2568, step 1: stored + // only, not yet applied to QPs). Out-of-range values are ignored so a + // bad config never breaks selection. + if (policy_json.contains("service_level")) { + int sl = policy_json.value("service_level", -1); + if (sl >= 0 && sl <= 15) { + policy.service_level = sl; + } else { + LOG(WARNING) << "Ignore service_level in policy " << policy.name + << ", value " << sl << " out of range (0-15)"; + } + } + if (policy_json.contains("traffic_class")) { + int tc = policy_json.value("traffic_class", -1); + if (tc >= 0 && tc <= 255) { + policy.traffic_class = tc; + } else { + LOG(WARNING) << "Ignore traffic_class in policy " << policy.name + << ", value " << tc << " out of range (0-255)"; + } + } + // Reserved for step 2 (per-class QP pools); parsed for forward schema + // compatibility, no effect yet. + if (policy_json.contains("qp_pool")) { + auto& qp = policy_json["qp_pool"]; + if (qp.is_string()) policy.qp_pool = qp.get(); + } + policies_.push_back(std::move(policy)); LOG(INFO) << "Loaded transport policy: " << policy.name << " (segment_type=" << segment_type_str @@ -384,6 +412,13 @@ SelectionResult TransportSelector::select( return result; // UNSPEC, all devices } + // Carry the matched policy's link-layer QoS out to the caller (RFC #2519 / + // #2568, step 1). These are plumbed but not yet applied at QP setup; that + // is the per-class QP pool follow-up (step 2). + result.service_level = matching_policy->service_level; + result.traffic_class = matching_policy->traffic_class; + result.qp_pool = matching_policy->qp_pool; + // Convert device names to mask result.device_mask = ~0ULL; // Default: all devices if (!matching_policy->devices.empty() && topology_) { diff --git a/mooncake-transfer-engine/tent/tests/transport_selector_test.cpp b/mooncake-transfer-engine/tent/tests/transport_selector_test.cpp index a479c8e43a..c08e60643d 100644 --- a/mooncake-transfer-engine/tent/tests/transport_selector_test.cpp +++ b/mooncake-transfer-engine/tent/tests/transport_selector_test.cpp @@ -641,6 +641,77 @@ TEST(TransportSelectorTest, HintNotInMatchingPolicyReturnsUnspec) { EXPECT_EQ(r.transport, UNSPEC); } +// RFC #2519 / #2568 step 1: a policy's link-layer QoS (service_level / +// traffic_class / qp_pool) is parsed from JSON and carried out via +// SelectionResult. (Step 1 only plumbs the values; applying them at QP setup +// is the per-class QP pool follow-up.) +TEST(TransportSelectorTest, PolicyLinkLayerQoSIsParsedAndCarried) { + auto conf = std::make_shared(); + json policy; + policy["name"] = "kv-critical"; + policy["segment_type"] = "memory"; + policy["transports"] = {"rdma"}; + policy["service_level"] = 3; + policy["traffic_class"] = 96; + policy["qp_pool"] = "kv"; + conf->set("policy", json::array({policy})); + + TransportSelector selector(conf); + std::array, kSupportedTransportTypes> + transports{}; + transports[RDMA] = std::make_shared(RDMA); + static_cast(transports[RDMA].get())->setDramToDram(true); + + std::vector buffer_transports = {RDMA}; + SelectionContext ctx; + ctx.segment_type = SegmentType::Memory; + ctx.same_machine = false; + ctx.local_memory_type = MTYPE_CPU; + ctx.remote_memory_type = MTYPE_CPU; + ctx.buffer_transports = &buffer_transports; + ctx.policy_name = "kv-critical"; + + auto r = selector.select(ctx, transports, /*index=*/0); + ASSERT_TRUE(r.service_level.has_value()); + EXPECT_EQ(r.service_level.value(), 3); + ASSERT_TRUE(r.traffic_class.has_value()); + EXPECT_EQ(r.traffic_class.value(), 96); + ASSERT_TRUE(r.qp_pool.has_value()); + EXPECT_EQ(r.qp_pool.value(), "kv"); +} + +// Out-of-range SL/TC are ignored (left as nullopt) so a bad config never +// changes selection behavior. +TEST(TransportSelectorTest, PolicyLinkLayerQoSOutOfRangeIgnored) { + auto conf = std::make_shared(); + json policy; + policy["name"] = "bad-qos"; + policy["segment_type"] = "memory"; + policy["transports"] = {"rdma"}; + policy["service_level"] = 99; // > 15, invalid + policy["traffic_class"] = 9999; // > 255, invalid + conf->set("policy", json::array({policy})); + + TransportSelector selector(conf); + std::array, kSupportedTransportTypes> + transports{}; + transports[RDMA] = std::make_shared(RDMA); + static_cast(transports[RDMA].get())->setDramToDram(true); + + std::vector buffer_transports = {RDMA}; + SelectionContext ctx; + ctx.segment_type = SegmentType::Memory; + ctx.same_machine = false; + ctx.local_memory_type = MTYPE_CPU; + ctx.remote_memory_type = MTYPE_CPU; + ctx.buffer_transports = &buffer_transports; + ctx.policy_name = "bad-qos"; + + auto r = selector.select(ctx, transports, /*index=*/0); + EXPECT_FALSE(r.service_level.has_value()); + EXPECT_FALSE(r.traffic_class.has_value()); +} + } // namespace } // namespace tent } // namespace mooncake