diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7a6642965b..db0a77a677 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -148,7 +148,7 @@ jobs: export MOONCAKE_STORE_LIB_DIR=$GITHUB_WORKSPACE/build/mooncake-store/src export MOONCAKE_STORE_INCLUDE_DIR=$GITHUB_WORKSPACE/mooncake-store/include # This job builds Mooncake with -DENABLE_ASAN=ON, so the C++ libraries - # the Rust crate links against carry undefined __asan_* references. Opt + # the Rust package links against carry undefined __asan_* references. Opt # in to linking the ASan runtime; build.rs emits -lasan first, which # keeps libasan first in the initial library list as ASan requires. # Non-sanitized builds leave this unset and link without ASan. @@ -179,7 +179,7 @@ jobs: export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/build/mooncake-common:$GITHUB_WORKSPACE/build/mooncake-store/src:$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src:$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src/common/base:$GITHUB_WORKSPACE/build/mooncake-common/etcd export CGO_ENABLED=1 export CGO_CFLAGS="-I$GITHUB_WORKSPACE/mooncake-store/include -I$GITHUB_WORKSPACE/mooncake-transfer-engine/include" - export CGO_LDFLAGS="-L$GITHUB_WORKSPACE/build/mooncake-store/src -L$GITHUB_WORKSPACE/build/mooncake-store/src/cachelib_memory_allocator -L$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src -L$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src/common/base -L$GITHUB_WORKSPACE/build/mooncake-common -L$GITHUB_WORKSPACE/build/mooncake-common/etcd -lmooncake_store -lcachelib_memory_allocator -ltransfer_engine -lbase -lasio -letcd_wrapper -lstdc++ -lnuma -lglog -lgflags -libverbs -lmlx5 -ljsoncpp -lzstd -lcurl -luring -lasan -lm -lgcov -lxxhash" + export CGO_LDFLAGS="-L$GITHUB_WORKSPACE/build/mooncake-store/src -L$GITHUB_WORKSPACE/build/mooncake-store/src/cachelib_memory_allocator -L$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src -L$GITHUB_WORKSPACE/build/mooncake-transfer-engine/src/common/base -L$GITHUB_WORKSPACE/build/mooncake-common -L$GITHUB_WORKSPACE/build/mooncake-common/etcd -lmooncake_store -lcachelib_memory_allocator -ltransfer_engine -lbase -lasio -letcd_wrapper -lstdc++ -lnuma -lglog -lgflags -libverbs -lmlx5 -ljsoncpp -lzstd -lcurl -luring -lasan -lm -lgcov -lxxhash -lyaml-cpp" # Link cudart if CUDA is available (needed for D2H staging in mooncake_store) if [ -d /usr/local/cuda/lib64 ]; then export CGO_LDFLAGS="$CGO_LDFLAGS -L/usr/local/cuda/lib64 -lcudart"; fi ASAN_OPTIONS=detect_leaks=0:verify_asan_link_order=0 MC_METADATA_SERVER=http://127.0.0.1:8080/metadata go test -v ./tests/... diff --git a/docs/source/deployment/mooncake-store-deployment-guide.md b/docs/source/deployment/mooncake-store-deployment-guide.md index 9af983edc7..594a74839c 100644 --- a/docs/source/deployment/mooncake-store-deployment-guide.md +++ b/docs/source/deployment/mooncake-store-deployment-guide.md @@ -256,6 +256,7 @@ When tenant quota is enabled, `/metrics` also includes per-tenant quota gauges a - `mooncake_tenant_quota_used_bytes{tenant_id}` - `mooncake_tenant_quota_reserved_bytes{tenant_id}` - `mooncake_tenant_quota_committed_count{tenant_id}` +- `mooncake_tenant_quota_metadata_object_count{tenant_id}` - `mooncake_tenant_quota_over_quota{tenant_id}` - `mooncake_tenant_quota_explicit_policy{tenant_id}` - `mooncake_tenant_quota_reject_total{tenant_id,reason}` @@ -268,16 +269,29 @@ When tenant quota is enabled, `/metrics` also includes per-tenant quota gauges a ## Tenant Quota Management -Tenant quota admission is disabled by default. Enable it on the master when you want memory writes admitted against per-tenant quota: +Tenant quota admission is disabled by default. Enable strict multi-tenant mode on the master when you want memory writes admitted against connector-managed per-tenant quota: ```bash mooncake_master \ - --enable_tenant_quota=true \ - --default_tenant_quota_bytes=1073741824 \ - --tenant_quota_pool_capacity_bytes=0 + --enable_multi_tenants=true \ + --tenant_quota_connector_type=file \ + --tenant_quota_connector_uri=/etc/mooncake/tenant_quotas.yaml ``` -`tenant_quota_pool_capacity_bytes=0` uses the full registered memory capacity as the quota allocation pool. A nonzero value caps the capacity used to compute effective tenant quotas. +The v1 connector is a writable YAML file. The file must use schema version `1`; tenant names must be non-empty, unique, must not start with `_`, and must not contain NUL or control characters; quotas must be positive integers with optional `B`, `KB`, `MB`, `GB`, or `TB` units: + +```yaml +version: 1 + +tenants: + - name: tenant-a + quota: 200GB + + - name: tenant-b + quota: 500GB +``` + +When strict multi-tenant mode is enabled, write requests must include a registered tenant. The `default` tenant is not special unless it is explicitly registered in the connector policy. The same HTTP port used for metrics exposes the tenant quota admin API: @@ -293,14 +307,8 @@ curl -s -X PUT "http://:9003/api/v1/tenant_quotas?tenant_id=tenant- -H 'Content-Type: application/json' \ -d '{"requested_quota_bytes":2147483648}' -# Delete an explicit policy so the tenant inherits the default policy again. +# Delete an explicit policy. The tenant must not own objects or quota usage. curl -s -X DELETE "http://:9003/api/v1/tenant_quotas?tenant_id=tenant-a" - -# Query or update the default requested quota. The default may be 0. -curl -s http://:9003/api/v1/tenant_quotas/default -curl -s -X PUT http://:9003/api/v1/tenant_quotas/default \ - -H 'Content-Type: application/json' \ - -d '{"requested_quota_bytes":1073741824}' ``` Each tenant quota snapshot returns: @@ -315,13 +323,14 @@ Each tenant quota snapshot returns: "used_bytes": 0, "reserved_bytes": 0, "committed_count": 0, + "metadata_object_count": 0, "over_quota": false, "has_explicit_policy": true } } ``` -In HA mode, quota admin requests are served only by the active master service. Standby, candidate, or inactive services return HTTP 503. If tenant quota is disabled, the quota admin API returns HTTP 409 with `UNAVAILABLE_IN_CURRENT_MODE`. +In HA mode, quota admin requests are served only by the active master service. Standby, candidate, or inactive services return HTTP 503. If strict multi-tenant mode is disabled, the quota admin API returns HTTP 409 with `UNAVAILABLE_IN_CURRENT_MODE`. Deleting a non-empty tenant returns HTTP 409 with `TENANT_NOT_EMPTY`. --- @@ -465,9 +474,9 @@ mooncake_master \ | Flag | Default | Description | |------|---------|-------------| -| `--enable_tenant_quota` | `false` | Enable per-tenant memory quota admission | -| `--default_tenant_quota_bytes` | `0` | Default requested quota for tenants without explicit policy; `0` is allowed and inherited-default tenants still share capacity left by explicit tenants | -| `--tenant_quota_pool_capacity_bytes` | `0` | Capacity used to compute effective tenant quotas; `0` means total registered memory capacity | +| `--enable_multi_tenants` | `false` | Enable strict tenant registration and per-tenant memory quota admission | +| `--tenant_quota_connector_type` | `file` | Tenant quota policy connector type | +| `--tenant_quota_connector_uri` | empty | Connector URI; for `file`, the writable YAML policy path | ### High Availability diff --git a/docs/source/design/mooncake-store.md b/docs/source/design/mooncake-store.md index 5a840d1d36..2ebb67bd6e 100644 --- a/docs/source/design/mooncake-store.md +++ b/docs/source/design/mooncake-store.md @@ -93,33 +93,43 @@ To reduce cache warm-up time after a master restart, the Master Service supports ### Tenant Quota -The Master Service can optionally enforce memory quota admission per tenant. This feature is disabled by default. When `enable_tenant_quota=false`, existing allocation and eviction behavior is preserved, and tenant quota management requests return `UNAVAILABLE_IN_CURRENT_MODE`. +The Master Service can optionally enforce strict multi-tenant memory quota admission. This feature is disabled by default. When `enable_multi_tenants=false`, request tenant IDs are ignored for object placement, all objects use the `default` namespace, and tenant quota management requests return `UNAVAILABLE_IN_CURRENT_MODE`. -When tenant quota is enabled, each tenant has a requested quota policy and an effective quota. Explicit tenant policies are set through the master admin HTTP API. Tenants without an explicit policy inherit the default requested quota. The default policy may be `0`; explicit tenant policies must be positive. +When strict multi-tenant mode is enabled, the tenant quota policy is loaded from the configured connector. The v1 connector is a writable YAML file configured by `tenant_quota_connector_type=file` and `tenant_quota_connector_uri=`. Tenants must be explicitly present in that connector policy before they can write. Missing tenants, empty tenants, and an unregistered `default` tenant are rejected with `TENANT_NOT_REGISTERED`. -Effective quota is recomputed from the current registered memory capacity and the optional tenant quota pool cap: +The YAML policy uses schema version `1`: -- The allocatable capacity is the smaller of total registered memory and `tenant_quota_pool_capacity_bytes`; a pool cap of `0` means total registered memory. -- If explicit tenant requests fit within the allocatable capacity, explicit tenants receive their requested quotas and the remaining capacity is split evenly among active inherited-default tenants. -- If explicit tenant requests exceed the allocatable capacity, only explicit tenants receive quota, scaled proportionally by request size. Inherited-default tenants receive `0` effective quota until capacity is available. +```yaml +version: 1 + +tenants: + - name: tenant-a + quota: 200GB +``` + +Tenant names must be non-empty, unique, must not start with `_`, and must not contain NUL or control characters. Quotas must be positive integers and may use `B`, `KB`, `MB`, `GB`, or `TB` units. + +Effective quota is recomputed from the current registered memory capacity: + +- If explicit tenant requests fit within the registered memory capacity, tenants receive their requested quotas and remaining capacity stays unallocated. +- If explicit tenant requests exceed registered memory capacity, explicit tenants receive quota scaled proportionally by request size. - Remainders are assigned deterministically by tenant ID, so repeated recomputes produce stable results. +- Tenants present in restored metadata but missing from the connector policy become in-memory orphans with requested quota `0`, effective quota `0`, and `over_quota=true` while they still own metadata. Reads and removals are allowed so operators can clean them up; writes remain blocked until the tenant is re-registered or emptied. `PutStart` and size-changing `UpsertStart` charge quota before memory is allocated. If the first reservation fails, the master performs tenant-scoped memory eviction for the target tenant and retries the reservation. The retry is bounded to two eviction attempts. Tenant quota eviction scans only the target tenant, skips hard-pinned objects, honors soft-pin eviction configuration, and preserves grouped-object lease safety checks. -The admin HTTP API exposes: +Admin policy changes are persisted before the final in-memory policy is applied. `PUT` writes the connector first and then applies the policy in memory. `DELETE` first marks the tenant unregistered in memory to block concurrent writes, verifies the tenant is empty, writes the connector, and rolls back the in-memory mark if the connector write fails. The admin HTTP API exposes: | Method | Path | Description | |--------|------|-------------| | `GET` | `/api/v1/tenant_quotas` | List quota snapshots for active or explicit tenants | | `GET` | `/api/v1/tenant_quotas?tenant_id=` | Query one tenant quota snapshot | -| `PUT` | `/api/v1/tenant_quotas?tenant_id=` | Upsert an explicit tenant quota policy | -| `DELETE` | `/api/v1/tenant_quotas?tenant_id=` | Delete an explicit tenant quota policy | -| `GET` | `/api/v1/tenant_quotas/default` | Query the default requested quota policy | -| `PUT` | `/api/v1/tenant_quotas/default` | Set the default requested quota policy | +| `PUT` | `/api/v1/tenant_quotas?tenant_id=` | Create or update a tenant quota policy | +| `DELETE` | `/api/v1/tenant_quotas?tenant_id=` | Delete an empty tenant quota policy | -Tenant quota snapshots include `tenant_id`, `requested_quota_bytes`, `effective_quota_bytes`, `used_bytes`, `reserved_bytes`, `committed_count`, `over_quota`, and `has_explicit_policy`. +Tenant quota snapshots include `tenant_id`, `requested_quota_bytes`, `effective_quota_bytes`, `used_bytes`, `reserved_bytes`, `committed_count`, `metadata_object_count`, `over_quota`, and `has_explicit_policy`. -When snapshots are enabled, tenant quota policies are written to a separate `tenant_quota_policy` snapshot object. This file stores only requested policy: the default requested quota and explicit tenant requested quotas. Effective quota, usage, reservations, and committed object charge are rebuilt from restored metadata and current capacity. Older snapshots without `tenant_quota_policy` remain valid and use the configured default quota on restore. +Snapshots restore object runtime state only. Tenant quota policy is always loaded from the connector after metadata restore, then usage and effective quota are rebuilt from restored metadata and current registered capacity. If the connector cannot be loaded in strict multi-tenant mode, startup fails. ### Master Service APIs diff --git a/mooncake-store/conf/master.yaml b/mooncake-store/conf/master.yaml index 02d06b9e05..2b772e3ab8 100644 --- a/mooncake-store/conf/master.yaml +++ b/mooncake-store/conf/master.yaml @@ -13,6 +13,10 @@ allow_evict_soft_pinned_objects: true eviction_ratio: 0.1 eviction_high_watermark_ratio: 1.0 +enable_multi_tenants: false +tenant_quota_connector_type: "file" +tenant_quota_connector_uri: "" + enable_ha: false etcd_endpoints: "http://localhost:2379" root_fs_dir: "" diff --git a/mooncake-store/go/build.sh b/mooncake-store/go/build.sh index 52304cc7e3..ac190e8b1a 100755 --- a/mooncake-store/go/build.sh +++ b/mooncake-store/go/build.sh @@ -41,7 +41,7 @@ CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-transfer-engine/src" CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-transfer-engine/src/common/base" CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-common" CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-common/src" -CGO_LDFLAGS+=" -lmooncake_store -lcachelib_memory_allocator -ltransfer_engine -lbase -lasio -lmooncake_common -lxxhash" +CGO_LDFLAGS+=" -lmooncake_store -lcachelib_memory_allocator -ltransfer_engine -lbase -lasio -lmooncake_common -lxxhash -lyaml-cpp" CGO_LDFLAGS+=" -lstdc++ -lnuma -lglog -lgflags -libverbs -lmlx5 -ljsoncpp -lzstd -lcurl -lm" if [ -d "/usr/local/cuda/lib64" ]; then diff --git a/mooncake-store/include/master_admin_service.h b/mooncake-store/include/master_admin_service.h index 785e3c6e21..1898cb43ef 100644 --- a/mooncake-store/include/master_admin_service.h +++ b/mooncake-store/include/master_admin_service.h @@ -99,10 +99,6 @@ class MasterAdminServer { coro_http::coro_http_response& resp); void HandleDeleteTenantQuota(coro_http::coro_http_request& req, coro_http::coro_http_response& resp); - void HandleGetDefaultTenantQuota(coro_http::coro_http_request& req, - coro_http::coro_http_response& resp); - void HandleSetDefaultTenantQuota(coro_http::coro_http_request& req, - coro_http::coro_http_response& resp); void RegisterHandler(); diff --git a/mooncake-store/include/master_config.h b/mooncake-store/include/master_config.h index 661e4d2b45..7af587a2fe 100644 --- a/mooncake-store/include/master_config.h +++ b/mooncake-store/include/master_config.h @@ -81,9 +81,9 @@ struct MasterConfig { // Storage backend eviction configuration bool enable_disk_eviction; uint64_t quota_bytes; - bool enable_tenant_quota = false; - uint64_t default_tenant_quota_bytes = 0; - uint64_t tenant_quota_pool_capacity_bytes = 0; + bool enable_multi_tenants = false; + std::string tenant_quota_connector_type = "file"; + std::string tenant_quota_connector_uri; bool enable_snapshot_restore; bool enable_snapshot; @@ -175,9 +175,9 @@ class MasterServiceSupervisorConfig { uint64_t put_start_release_timeout_sec = DEFAULT_PUT_START_RELEASE_TIMEOUT; bool enable_disk_eviction = true; uint64_t quota_bytes = 0; - bool enable_tenant_quota = false; - uint64_t default_tenant_quota_bytes = 0; - uint64_t tenant_quota_pool_capacity_bytes = 0; + bool enable_multi_tenants = false; + std::string tenant_quota_connector_type = "file"; + std::string tenant_quota_connector_uri; uint32_t max_total_finished_tasks = DEFAULT_MAX_TOTAL_FINISHED_TASKS; uint32_t max_total_pending_tasks = DEFAULT_MAX_TOTAL_PENDING_TASKS; uint32_t max_total_processing_tasks = DEFAULT_MAX_TOTAL_PROCESSING_TASKS; @@ -275,10 +275,9 @@ class MasterServiceSupervisorConfig { put_start_release_timeout_sec = config.put_start_release_timeout_sec; enable_disk_eviction = config.enable_disk_eviction; quota_bytes = config.quota_bytes; - enable_tenant_quota = config.enable_tenant_quota; - default_tenant_quota_bytes = config.default_tenant_quota_bytes; - tenant_quota_pool_capacity_bytes = - config.tenant_quota_pool_capacity_bytes; + enable_multi_tenants = config.enable_multi_tenants; + tenant_quota_connector_type = config.tenant_quota_connector_type; + tenant_quota_connector_uri = config.tenant_quota_connector_uri; enable_snapshot_restore = config.enable_snapshot_restore; enable_snapshot = config.enable_snapshot; @@ -408,9 +407,9 @@ class WrappedMasterServiceConfig { uint64_t put_start_release_timeout_sec = DEFAULT_PUT_START_RELEASE_TIMEOUT; bool enable_disk_eviction = true; uint64_t quota_bytes = 0; - bool enable_tenant_quota = false; - uint64_t default_tenant_quota_bytes = 0; - uint64_t tenant_quota_pool_capacity_bytes = 0; + bool enable_multi_tenants = false; + std::string tenant_quota_connector_type = "file"; + std::string tenant_quota_connector_uri; bool enable_snapshot_restore = false; bool enable_snapshot = false; @@ -476,10 +475,9 @@ class WrappedMasterServiceConfig { global_file_segment_size = config.global_file_segment_size; enable_disk_eviction = config.enable_disk_eviction; quota_bytes = config.quota_bytes; - enable_tenant_quota = config.enable_tenant_quota; - default_tenant_quota_bytes = config.default_tenant_quota_bytes; - tenant_quota_pool_capacity_bytes = - config.tenant_quota_pool_capacity_bytes; + enable_multi_tenants = config.enable_multi_tenants; + tenant_quota_connector_type = config.tenant_quota_connector_type; + tenant_quota_connector_uri = config.tenant_quota_connector_uri; // Convert string memory_allocator to BufferAllocatorType enum if (config.memory_allocator == "cachelib") { @@ -570,10 +568,9 @@ class WrappedMasterServiceConfig { memory_allocator = config.memory_allocator; enable_disk_eviction = config.enable_disk_eviction; quota_bytes = config.quota_bytes; - enable_tenant_quota = config.enable_tenant_quota; - default_tenant_quota_bytes = config.default_tenant_quota_bytes; - tenant_quota_pool_capacity_bytes = - config.tenant_quota_pool_capacity_bytes; + enable_multi_tenants = config.enable_multi_tenants; + tenant_quota_connector_type = config.tenant_quota_connector_type; + tenant_quota_connector_uri = config.tenant_quota_connector_uri; put_start_discard_timeout_sec = config.put_start_discard_timeout_sec; put_start_release_timeout_sec = config.put_start_release_timeout_sec; @@ -635,9 +632,9 @@ class MasterServiceConfigBuilder { AllocationStrategyType::RANDOM; bool enable_disk_eviction_ = true; uint64_t quota_bytes_ = 0; - bool enable_tenant_quota_ = false; - uint64_t default_tenant_quota_bytes_ = 0; - uint64_t tenant_quota_pool_capacity_bytes_ = 0; + bool enable_multi_tenants_ = false; + std::string tenant_quota_connector_type_ = "file"; + std::string tenant_quota_connector_uri_; uint64_t put_start_discard_timeout_sec_ = DEFAULT_PUT_START_DISCARD_TIMEOUT; uint64_t put_start_release_timeout_sec_ = DEFAULT_PUT_START_RELEASE_TIMEOUT; bool enable_snapshot_restore_ = false; @@ -779,19 +776,20 @@ class MasterServiceConfigBuilder { return *this; } - MasterServiceConfigBuilder& set_enable_tenant_quota(bool enable) { - enable_tenant_quota_ = enable; + MasterServiceConfigBuilder& set_enable_multi_tenants(bool enable) { + enable_multi_tenants_ = enable; return *this; } - MasterServiceConfigBuilder& set_default_tenant_quota_bytes(uint64_t bytes) { - default_tenant_quota_bytes_ = bytes; + MasterServiceConfigBuilder& set_tenant_quota_connector_type( + const std::string& type) { + tenant_quota_connector_type_ = type; return *this; } - MasterServiceConfigBuilder& set_tenant_quota_pool_capacity_bytes( - uint64_t bytes) { - tenant_quota_pool_capacity_bytes_ = bytes; + MasterServiceConfigBuilder& set_tenant_quota_connector_uri( + const std::string& uri) { + tenant_quota_connector_uri_ = uri; return *this; } @@ -980,9 +978,9 @@ class MasterServiceConfig { uint64_t put_start_release_timeout_sec = DEFAULT_PUT_START_RELEASE_TIMEOUT; bool enable_disk_eviction = true; uint64_t quota_bytes = 0; - bool enable_tenant_quota = false; - uint64_t default_tenant_quota_bytes = 0; - uint64_t tenant_quota_pool_capacity_bytes = 0; + bool enable_multi_tenants = false; + std::string tenant_quota_connector_type = "file"; + std::string tenant_quota_connector_uri; bool enable_snapshot_restore = false; bool enable_snapshot = false; @@ -1045,10 +1043,9 @@ class MasterServiceConfig { allocation_strategy_type = config.allocation_strategy_type; enable_disk_eviction = config.enable_disk_eviction; quota_bytes = config.quota_bytes; - enable_tenant_quota = config.enable_tenant_quota; - default_tenant_quota_bytes = config.default_tenant_quota_bytes; - tenant_quota_pool_capacity_bytes = - config.tenant_quota_pool_capacity_bytes; + enable_multi_tenants = config.enable_multi_tenants; + tenant_quota_connector_type = config.tenant_quota_connector_type; + tenant_quota_connector_uri = config.tenant_quota_connector_uri; put_start_discard_timeout_sec = config.put_start_discard_timeout_sec; put_start_release_timeout_sec = config.put_start_release_timeout_sec; @@ -1112,9 +1109,9 @@ inline MasterServiceConfig MasterServiceConfigBuilder::build() const { config.put_start_release_timeout_sec = put_start_release_timeout_sec_; config.enable_disk_eviction = enable_disk_eviction_; config.quota_bytes = quota_bytes_; - config.enable_tenant_quota = enable_tenant_quota_; - config.default_tenant_quota_bytes = default_tenant_quota_bytes_; - config.tenant_quota_pool_capacity_bytes = tenant_quota_pool_capacity_bytes_; + config.enable_multi_tenants = enable_multi_tenants_; + config.tenant_quota_connector_type = tenant_quota_connector_type_; + config.tenant_quota_connector_uri = tenant_quota_connector_uri_; config.enable_snapshot_restore = enable_snapshot_restore_; config.enable_snapshot = enable_snapshot_; config.snapshot_backup_dir = snapshot_backup_dir_; diff --git a/mooncake-store/include/master_service.h b/mooncake-store/include/master_service.h index fa54a19b1e..76b5bdf924 100644 --- a/mooncake-store/include/master_service.h +++ b/mooncake-store/include/master_service.h @@ -28,6 +28,7 @@ #include "mutex.h" #include "segment.h" #include "tenant_quota.h" +#include "tenant_quota_policy_store.h" #include "types.h" #include "master_config.h" #include "rpc_types.h" @@ -66,9 +67,15 @@ class MasterServiceTenantQuotaTest; * @brief MasterService is the main class for the master server. * Lock order: To avoid deadlocks, the following lock order should be followed: * 1. client_mutex_ - * 2. metadata_shards_[shard_idx_].mutex - * 3. tenant_quota_shards_[shard_idx_].mutex - * 4. segment_mutex_ + * 2. tenant_quota_policy_mutex_ + * 3. snapshot_mutex_ + * 4. metadata_shards_[shard_idx_].mutex + * 5. tenant_quota_shards_[shard_idx_].mutex + * 6. segment_mutex_ + * + * Strict tenant admission and policy mutation paths that need both + * tenant_quota_policy_mutex_ and snapshot_mutex_ must acquire the tenant + * policy mutex first, then snapshot_mutex_. */ class MasterService { // Test friend class for snapshot/restore testing @@ -98,10 +105,8 @@ class MasterService { const std::string& tenant_id) const; tl::expected UpsertTenantQuotaPolicy( const std::string& tenant_id, uint64_t requested_quota_bytes); - std::optional DeleteTenantQuotaPolicy( - const std::string& tenant_id); - uint64_t GetDefaultTenantQuotaPolicy() const; - void SetDefaultTenantQuotaPolicy(uint64_t requested_quota_bytes); + tl::expected, ErrorCode> + DeleteTenantQuotaPolicy(const std::string& tenant_id); uint64_t GetTenantQuotaAllocatableCapacityBytes(); /** @@ -352,7 +357,7 @@ class MasterService { */ auto AddReplica(const UUID& client_id, const std::string& key, const std::string& tenant_id, Replica& replica) - -> tl::expected; + -> tl::expected; /** * @brief Revoke a put operation, replica_type indicates the type of @@ -1157,6 +1162,7 @@ class MasterService { } type; ReplicaID source_id; std::vector replica_ids; + uint64_t reserved_quota_charge_bytes{0}; }; struct OffloadingTask { @@ -1335,6 +1341,15 @@ class MasterService { const std::string& tenant_id) { return {NormalizeTenantId(tenant_id), user_key}; } + std::string NormalizeRequestTenantId(const std::string& tenant_id) const; + ObjectIdentity MakeObjectIdentityForRequest( + const std::string& user_key, const std::string& tenant_id) const; + tl::expected NormalizeTenantIdForWrite( + const std::string& tenant_id) const; + tl::expected NormalizeTenantIdForWriteLocked( + const std::string& tenant_id) const; + bool IsTenantRegistered(const std::string& tenant_id) const; + bool TenantHasObjects(const std::string& tenant_id) const; static std::string MakeTenantScopedKey(const std::string& tenant_id, const std::string& key) { @@ -1394,6 +1409,8 @@ class MasterService { uint64_t CompletedMemoryQuotaCharge(const ObjectMetadata& metadata) const; uint64_t RequestedMemoryQuotaCharge(uint64_t value_length, const ReplicateConfig& config) const; + bool ShouldProtectZeroChargeMetadataCreate( + uint64_t requested_quota_charge) const; uint64_t ComputeTenantQuotaDeficit(const std::string& tenant_id, uint64_t incoming_quota_charge); tl::expected ReserveTenantQuota( @@ -1403,9 +1420,18 @@ class MasterService { void ReleaseTenantQuota(const std::string& tenant_id, uint64_t bytes); void ReleaseTenantQuotaPartial(const std::string& tenant_id, uint64_t bytes); + void CommitAdditionalTenantQuota(const std::string& tenant_id, + uint64_t bytes); + void AbortReplicationTaskQuota(const std::string& tenant_id, + const ReplicationTask& task); + void IncrementTenantMetadataObjectCount(const std::string& tenant_id); + void DecrementTenantMetadataObjectCount(const std::string& tenant_id); void ReleaseCommittedQuotaCharge(ObjectMetadata& metadata, uint64_t bytes); void RecomputeTenantEffectiveQuotas(); void RebuildTenantQuotaUsageFromMetadata(); + void LoadTenantQuotaPoliciesFromStoreOrThrow(); + void ApplyTenantQuotaPolicies(const TenantQuotaPolicySnapshot& snapshot); + TenantQuotaPolicySnapshot BuildTenantQuotaPolicySnapshot() const; uint64_t GetTenantQuotaCapacityBytes(); std::unordered_map::iterator EraseMetadata( TenantState& tenant_state, @@ -1674,6 +1700,10 @@ class MasterService { enable_soft_pin, enable_hard_pin, data_type, group_id, object_id_.tenant_id, object_id_.user_key)); it_ = result.first; + if (result.second) { + service_->IncrementTenantMetadataObjectCount( + object_id_.tenant_id); + } } private: @@ -1756,20 +1786,6 @@ class MasterService { const msgpack::object& obj); }; - class TenantQuotaPolicySerializer { - public: - TenantQuotaPolicySerializer(MasterService* service) - : service_(service) {} - - tl::expected, SerializationError> Serialize(); - tl::expected Deserialize( - const std::vector& data); - void Reset(); - - private: - MasterService* service_; - }; - friend class MetadataAccessor; class MetadataAccessorRO { public: @@ -1923,13 +1939,11 @@ class MasterService { // storage backend eviction configuration const bool enable_disk_eviction_; const uint64_t quota_bytes_; - const bool enable_tenant_quota_; - // Startup default used when restoring legacy snapshots without - // tenant_quota_policy. - const uint64_t configured_default_tenant_quota_bytes_; - // Runtime default policy, mutable through the tenant quota admin API. - std::atomic default_tenant_quota_bytes_; - const uint64_t tenant_quota_pool_capacity_bytes_; + const bool enable_multi_tenants_; + const std::string tenant_quota_connector_type_; + const std::string tenant_quota_connector_uri_; + std::unique_ptr tenant_quota_policy_store_; + mutable std::mutex tenant_quota_policy_mutex_; mutable std::mutex tenant_quota_recompute_mutex_; // HTTP metadata server pointer for cleanup on client timeout diff --git a/mooncake-store/include/rpc_service.h b/mooncake-store/include/rpc_service.h index 023d65e352..bf93f386f4 100644 --- a/mooncake-store/include/rpc_service.h +++ b/mooncake-store/include/rpc_service.h @@ -185,9 +185,6 @@ class WrappedMasterService { const std::string& tenant_id, uint64_t requested_quota_bytes); tl::expected, ErrorCode> DeleteTenantQuotaPolicy(const std::string& tenant_id); - tl::expected GetDefaultTenantQuotaPolicy(); - tl::expected SetDefaultTenantQuotaPolicy( - uint64_t requested_quota_bytes); tl::expected GetTenantQuotaAllocatableCapacityBytes(); tl::expected, ErrorCode> GetAllKeysForAdmin(); diff --git a/mooncake-store/include/tenant_quota.h b/mooncake-store/include/tenant_quota.h index ef5c1e0fa0..bd189db2bb 100644 --- a/mooncake-store/include/tenant_quota.h +++ b/mooncake-store/include/tenant_quota.h @@ -16,6 +16,7 @@ struct TenantQuotaState { uint64_t used_bytes = 0; uint64_t reserved_bytes = 0; uint64_t committed_count = 0; + uint64_t metadata_object_count = 0; bool has_explicit_policy = false; bool over_quota = false; }; @@ -27,6 +28,7 @@ struct TenantQuotaSnapshot { uint64_t used_bytes = 0; uint64_t reserved_bytes = 0; uint64_t committed_count = 0; + uint64_t metadata_object_count = 0; bool has_explicit_policy = false; bool over_quota = false; }; @@ -46,14 +48,10 @@ using TenantQuotaResult = tl::expected; std::vector BuildEffectiveQuotaAssignments( const std::map& tenants, - uint64_t default_requested_quota_bytes, uint64_t allocatable_capacity_bytes); class TenantQuotaTable { public: - void SetDefaultRequestedQuota(uint64_t bytes); - uint64_t GetDefaultRequestedQuota() const; - TenantQuotaResult UpsertTenantPolicy(std::string tenant_id, uint64_t requested_quota_bytes); void EraseTenantPolicy(std::string tenant_id); @@ -76,7 +74,6 @@ class TenantQuotaTable { const TenantQuotaState& state) const; void RefreshOverQuota(TenantQuotaState* state) const; - uint64_t default_requested_quota_bytes_ = 0; std::map tenants_; }; diff --git a/mooncake-store/include/tenant_quota_policy_store.h b/mooncake-store/include/tenant_quota_policy_store.h new file mode 100644 index 0000000000..30f3d85d41 --- /dev/null +++ b/mooncake-store/include/tenant_quota_policy_store.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace mooncake { + +struct TenantQuotaPolicySnapshot { + std::map tenant_quotas; +}; + +tl::expected ParseTenantQuotaBytes( + const std::string& value); + +tl::expected ParseTenantQuotaPolicyYaml( + const std::string& yaml); + +std::string FormatTenantQuotaPolicyYaml( + const TenantQuotaPolicySnapshot& snapshot); + +class TenantQuotaPolicyStore { + public: + virtual ~TenantQuotaPolicyStore() = default; + + virtual tl::expected Load() = 0; + virtual tl::expected Save( + const TenantQuotaPolicySnapshot& snapshot) = 0; +}; + +class YamlTenantQuotaPolicyStore final : public TenantQuotaPolicyStore { + public: + explicit YamlTenantQuotaPolicyStore(std::string path); + + tl::expected Load() override; + tl::expected Save( + const TenantQuotaPolicySnapshot& snapshot) override; + + private: + std::string path_; + std::mutex mutex_; +}; + +tl::expected, std::string> +CreateTenantQuotaPolicyStore(const std::string& type, const std::string& uri); + +} // namespace mooncake diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index e2ef20afbc..0532e11801 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -227,6 +227,18 @@ inline std::string NormalizeTenantId(const std::string& tenant_id) { return tenant_id.empty() ? "default" : tenant_id; } +inline bool IsValidTenantId(const std::string& tenant_id) { + if (tenant_id.empty() || tenant_id.front() == '_') { + return false; + } + for (unsigned char c : tenant_id) { + if (c < 0x20 || c == 0x7f) { + return false; + } + } + return true; +} + inline std::string MakeTenantScopedStorageKey(const std::string& tenant_id, const std::string& key) { const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -408,6 +420,8 @@ enum class ErrorCode : int32_t { DFS_STALE_HANDLE = -1604, ///< DFS file handle expired. DFS_PARTIAL_WRITE = -1605, ///< DFS partial write success. TENANT_QUOTA_EXCEEDED = -1700, ///< Tenant memory quota exceeded. + TENANT_NOT_REGISTERED = -1701, ///< Tenant has no quota policy. + TENANT_NOT_EMPTY = -1702, ///< Tenant still owns objects or quota. }; int32_t toInt(ErrorCode errorCode) noexcept; diff --git a/mooncake-store/rust/build.rs b/mooncake-store/rust/build.rs index 5622d9550a..abcd95b06c 100644 --- a/mooncake-store/rust/build.rs +++ b/mooncake-store/rust/build.rs @@ -234,6 +234,7 @@ fn main() { println!("cargo:rustc-link-lib=numa"); // NUMA binding println!("cargo:rustc-link-lib=curl"); // HTTP metadata plugin println!("cargo:rustc-link-lib=ibverbs"); // RDMA transport + println!("cargo:rustc-link-lib=yaml-cpp"); // tenant quota policy connector println!("cargo:rustc-link-lib=pthread"); println!("cargo:rustc-link-lib=xxhash"); @@ -339,6 +340,7 @@ fn main() { "numa", "ibverbs", "jsoncpp", + "yaml-cpp", "zstd", "m", ] { diff --git a/mooncake-store/src/CMakeLists.txt b/mooncake-store/src/CMakeLists.txt index e2c2830c38..85ca4c5f1c 100644 --- a/mooncake-store/src/CMakeLists.txt +++ b/mooncake-store/src/CMakeLists.txt @@ -17,6 +17,7 @@ set(MOONCAKE_STORE_SOURCES segment.cpp transfer_task.cpp tenant_quota.cpp + tenant_quota_policy_store.cpp rpc_service.cpp master_admin_service.cpp offset_allocator.cpp @@ -258,6 +259,7 @@ target_link_libraries( gflags::gflags ${EXTRA_LIBS} ${SPDK_STATIC_LIBS} + yaml-cpp asio_shared PRIVATE transfer_engine) if(STORE_USE_ETCD) diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index 8b16dc07a4..ed4eb998b2 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -262,14 +262,12 @@ DEFINE_bool(enable_disk_eviction, true, DEFINE_uint64( quota_bytes, 0, "Quota for storage backend in bytes (0 = use default 90% of capacity)"); -DEFINE_bool(enable_tenant_quota, false, - "Enable per-tenant memory quota admission"); -DEFINE_uint64(default_tenant_quota_bytes, 0, - "Default requested per-tenant memory quota in bytes " - "(0 is allowed; inherited tenants share remaining capacity)"); -DEFINE_uint64(tenant_quota_pool_capacity_bytes, 0, - "Capacity used to compute effective tenant quotas " - "(0 = mounted memory capacity)"); +DEFINE_bool(enable_multi_tenants, false, + "Enable strict multi-tenant namespace and quota admission"); +DEFINE_string(tenant_quota_connector_type, "file", + "Tenant quota policy connector type"); +DEFINE_string(tenant_quota_connector_uri, "", + "Tenant quota policy connector URI"); // Snapshot related configuration flags (migrated from global_flags) DEFINE_string(snapshot_backup_dir, "", @@ -486,15 +484,15 @@ void InitMasterConf(const mooncake::DefaultConfig& default_config, FLAGS_enable_disk_eviction); default_config.GetUInt64("quota_bytes", &master_config.quota_bytes, FLAGS_quota_bytes); - default_config.GetBool("enable_tenant_quota", - &master_config.enable_tenant_quota, - FLAGS_enable_tenant_quota); - default_config.GetUInt64("default_tenant_quota_bytes", - &master_config.default_tenant_quota_bytes, - FLAGS_default_tenant_quota_bytes); - default_config.GetUInt64("tenant_quota_pool_capacity_bytes", - &master_config.tenant_quota_pool_capacity_bytes, - FLAGS_tenant_quota_pool_capacity_bytes); + default_config.GetBool("enable_multi_tenants", + &master_config.enable_multi_tenants, + FLAGS_enable_multi_tenants); + default_config.GetString("tenant_quota_connector_type", + &master_config.tenant_quota_connector_type, + FLAGS_tenant_quota_connector_type); + default_config.GetString("tenant_quota_connector_uri", + &master_config.tenant_quota_connector_uri, + FLAGS_tenant_quota_connector_uri); default_config.GetString("snapshot_backup_dir", &master_config.snapshot_backup_dir, @@ -878,23 +876,22 @@ void LoadConfigFromCmdline(mooncake::MasterConfig& master_config, !conf_set) { master_config.quota_bytes = FLAGS_quota_bytes; } - if ((google::GetCommandLineFlagInfo("enable_tenant_quota", &info) && + if ((google::GetCommandLineFlagInfo("enable_multi_tenants", &info) && !info.is_default) || !conf_set) { - master_config.enable_tenant_quota = FLAGS_enable_tenant_quota; + master_config.enable_multi_tenants = FLAGS_enable_multi_tenants; } - if ((google::GetCommandLineFlagInfo("default_tenant_quota_bytes", &info) && + if ((google::GetCommandLineFlagInfo("tenant_quota_connector_type", &info) && !info.is_default) || !conf_set) { - master_config.default_tenant_quota_bytes = - FLAGS_default_tenant_quota_bytes; + master_config.tenant_quota_connector_type = + FLAGS_tenant_quota_connector_type; } - if ((google::GetCommandLineFlagInfo("tenant_quota_pool_capacity_bytes", - &info) && + if ((google::GetCommandLineFlagInfo("tenant_quota_connector_uri", &info) && !info.is_default) || !conf_set) { - master_config.tenant_quota_pool_capacity_bytes = - FLAGS_tenant_quota_pool_capacity_bytes; + master_config.tenant_quota_connector_uri = + FLAGS_tenant_quota_connector_uri; } if ((google::GetCommandLineFlagInfo("max_total_finished_tasks", &info) && !info.is_default) || diff --git a/mooncake-store/src/master_admin_service.cpp b/mooncake-store/src/master_admin_service.cpp index b282b4b928..0898dadb8b 100644 --- a/mooncake-store/src/master_admin_service.cpp +++ b/mooncake-store/src/master_admin_service.cpp @@ -72,9 +72,11 @@ coro_http::status_type ErrorCodeToHttpStatus(ErrorCode error) { case ErrorCode::JOB_NOT_FOUND: case ErrorCode::SEGMENT_NOT_FOUND: case ErrorCode::OBJECT_NOT_FOUND: + case ErrorCode::TENANT_NOT_REGISTERED: return coro_http::status_type::not_found; case ErrorCode::UNAVAILABLE_IN_CURRENT_MODE: case ErrorCode::UNAVAILABLE_IN_CURRENT_STATUS: + case ErrorCode::TENANT_NOT_EMPTY: return coro_http::status_type::conflict; default: return coro_http::status_type::internal_server_error; @@ -176,12 +178,13 @@ struct HttpTenantQuotaSnapshot { uint64_t used_bytes{0}; uint64_t reserved_bytes{0}; uint64_t committed_count{0}; + uint64_t metadata_object_count{0}; bool over_quota{false}; bool has_explicit_policy{false}; }; YLT_REFL(HttpTenantQuotaSnapshot, tenant_id, requested_quota_bytes, effective_quota_bytes, used_bytes, reserved_bytes, committed_count, - over_quota, has_explicit_policy); + metadata_object_count, over_quota, has_explicit_policy); HttpTenantQuotaSnapshot ToHttpTenantQuotaSnapshot( const TenantQuotaSnapshot& snapshot) { @@ -192,6 +195,7 @@ HttpTenantQuotaSnapshot ToHttpTenantQuotaSnapshot( .used_bytes = snapshot.used_bytes, .reserved_bytes = snapshot.reserved_bytes, .committed_count = snapshot.committed_count, + .metadata_object_count = snapshot.metadata_object_count, .over_quota = snapshot.over_quota, .has_explicit_policy = snapshot.has_explicit_policy, }; @@ -220,12 +224,6 @@ struct HttpTenantQuotaPolicyRequest { }; YLT_REFL(HttpTenantQuotaPolicyRequest, requested_quota_bytes); -struct HttpDefaultTenantQuotaResponse { - bool success{true}; - uint64_t requested_quota_bytes{0}; -}; -YLT_REFL(HttpDefaultTenantQuotaResponse, success, requested_quota_bytes); - tl::expected ParseAdminTenantId( coro_http::coro_http_request& req) { auto tenant_id_view = req.get_decode_query_value("tenant_id"); @@ -233,7 +231,7 @@ tl::expected ParseAdminTenantId( return tl::make_unexpected(ErrorCode::INVALID_PARAMS); } std::string tenant_id = NormalizeTenantId(std::string(tenant_id_view)); - if (tenant_id.empty() || tenant_id.front() == '_') { + if (!IsValidTenantId(tenant_id)) { return tl::make_unexpected(ErrorCode::INVALID_PARAMS); } return tenant_id; @@ -402,6 +400,9 @@ std::string MasterAdminServer::BuildTenantQuotaMetricsText() const { << "# HELP mooncake_tenant_quota_committed_count Tenant committed " "object count\n" << "# TYPE mooncake_tenant_quota_committed_count gauge\n" + << "# HELP mooncake_tenant_quota_metadata_object_count Tenant " + "metadata object count\n" + << "# TYPE mooncake_tenant_quota_metadata_object_count gauge\n" << "# HELP mooncake_tenant_quota_over_quota Tenant over-quota flag\n" << "# TYPE mooncake_tenant_quota_over_quota gauge\n" << "# HELP mooncake_tenant_quota_explicit_policy Tenant explicit " @@ -423,6 +424,9 @@ std::string MasterAdminServer::BuildTenantQuotaMetricsText() const { << tenant << "\"} " << snapshot.reserved_bytes << "\n"; tenant_metrics << "mooncake_tenant_quota_committed_count{tenant_id=\"" << tenant << "\"} " << snapshot.committed_count << "\n"; + tenant_metrics + << "mooncake_tenant_quota_metadata_object_count{tenant_id=\"" + << tenant << "\"} " << snapshot.metadata_object_count << "\n"; tenant_metrics << "mooncake_tenant_quota_over_quota{tenant_id=\"" << tenant << "\"} " << (snapshot.over_quota ? 1 : 0) << "\n"; @@ -1092,45 +1096,6 @@ void MasterAdminServer::HandleDeleteTenantQuota( }); } -void MasterAdminServer::HandleGetDefaultTenantQuota( - coro_http::coro_http_request&, coro_http::coro_http_response& resp) { - WithActiveService(resp, [&](auto service) { - auto result = service->GetDefaultTenantQuotaPolicy(); - if (!result.has_value()) { - WriteErrorResponse(resp, ErrorCodeToHttpStatus(result.error()), - result.error()); - return; - } - WriteJsonResponse(resp, coro_http::status_type::ok, - HttpDefaultTenantQuotaResponse{ - .requested_quota_bytes = result.value()}); - }); -} - -void MasterAdminServer::HandleSetDefaultTenantQuota( - coro_http::coro_http_request& req, coro_http::coro_http_response& resp) { - auto body_result = ParseQuotaPolicyBody(req); - if (!body_result.has_value()) { - WriteErrorResponse(resp, coro_http::status_type::bad_request, - ErrorCode::INVALID_PARAMS, body_result.error()); - return; - } - - WithActiveService(resp, [&](auto service) { - auto result = service->SetDefaultTenantQuotaPolicy( - body_result->requested_quota_bytes); - if (!result.has_value()) { - WriteErrorResponse(resp, ErrorCodeToHttpStatus(result.error()), - result.error()); - return; - } - WriteJsonResponse( - resp, coro_http::status_type::ok, - HttpDefaultTenantQuotaResponse{ - .requested_quota_bytes = body_result->requested_quota_bytes}); - }); -} - void MasterAdminServer::RegisterHandler() { using namespace coro_http; @@ -1219,16 +1184,6 @@ void MasterAdminServer::RegisterHandler() { [this](coro_http_request& req, coro_http_response& resp) { HandleDeleteTenantQuota(req, resp); }); - http_server_.set_http_handler( - "/api/v1/tenant_quotas/default", - [this](coro_http_request& req, coro_http_response& resp) { - HandleGetDefaultTenantQuota(req, resp); - }); - http_server_.set_http_handler( - "/api/v1/tenant_quotas/default", - [this](coro_http_request& req, coro_http_response& resp) { - HandleSetDefaultTenantQuota(req, resp); - }); http_server_.set_http_handler( "/batch_query_keys", [this](coro_http_request& req, coro_http_response& resp) { diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 4b8dcef210..9044014f22 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -1,6 +1,6 @@ #include "master_service.h" -#include +#include #include #include #include @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -47,8 +48,6 @@ namespace mooncake { static const std::string SNAPSHOT_METADATA_FILE = "metadata"; static const std::string SNAPSHOT_SEGMENTS_FILE = "segments"; static const std::string SNAPSHOT_TASK_MANAGER_FILE = "task_manager"; -static const std::string SNAPSHOT_TENANT_QUOTA_POLICY_FILE = - "tenant_quota_policy"; static const std::string SNAPSHOT_MANIFEST_FILE = "manifest.txt"; static const std::string SNAPSHOT_LATEST_FILE = "latest.txt"; static const std::string SNAPSHOT_BACKUP_SAVE_DIR = @@ -100,6 +99,20 @@ size_t RandomIndex(size_t upper_bound) { return dist(generator); } +uint64_t SaturatingAdd(uint64_t lhs, uint64_t rhs) { + if (lhs > std::numeric_limits::max() - rhs) { + return std::numeric_limits::max(); + } + return lhs + rhs; +} + +uint64_t SaturatingMultiply(uint64_t lhs, uint64_t rhs) { + if (lhs != 0 && rhs > std::numeric_limits::max() / lhs) { + return std::numeric_limits::max(); + } + return lhs * rhs; +} + bool HasExpectedReplicaAllocation(const ReplicateConfig& config, size_t allocated_memory_replicas, size_t allocated_nof_replicas) { @@ -116,13 +129,16 @@ bool HasExpectedReplicaAllocation(const ReplicateConfig& config, bool IsLazyEmptyTenantQuotaState(const TenantQuotaState& state) { return !state.has_explicit_policy && state.used_bytes == 0 && - state.reserved_bytes == 0 && state.committed_count == 0; + state.reserved_bytes == 0 && state.committed_count == 0 && + state.metadata_object_count == 0; } void RefreshTenantQuotaOverQuota(TenantQuotaState& state) { - state.over_quota = static_cast(state.used_bytes) + - state.reserved_bytes > - state.effective_quota_bytes; + state.over_quota = + (!state.has_explicit_policy && state.metadata_object_count > 0) || + static_cast(state.used_bytes) + + state.reserved_bytes > + state.effective_quota_bytes; } TenantQuotaSnapshot MakeTenantQuotaSnapshot(const std::string& tenant_id, @@ -134,6 +150,7 @@ TenantQuotaSnapshot MakeTenantQuotaSnapshot(const std::string& tenant_id, .used_bytes = state.used_bytes, .reserved_bytes = state.reserved_bytes, .committed_count = state.committed_count, + .metadata_object_count = state.metadata_object_count, .has_explicit_policy = state.has_explicit_policy, .over_quota = state.over_quota}; } @@ -193,11 +210,9 @@ MasterService::MasterService(const MasterServiceConfig& config) global_file_segment_size_(config.global_file_segment_size), enable_disk_eviction_(config.enable_disk_eviction), quota_bytes_(config.quota_bytes), - enable_tenant_quota_(config.enable_tenant_quota), - configured_default_tenant_quota_bytes_(config.default_tenant_quota_bytes), - default_tenant_quota_bytes_(config.default_tenant_quota_bytes), - tenant_quota_pool_capacity_bytes_( - config.tenant_quota_pool_capacity_bytes), + enable_multi_tenants_(config.enable_multi_tenants), + tenant_quota_connector_type_(config.tenant_quota_connector_type), + tenant_quota_connector_uri_(config.tenant_quota_connector_uri), segment_manager_(config.memory_allocator, config.enable_cxl), nof_segment_manager_(config.memory_allocator), memory_allocator_type_(config.memory_allocator), @@ -248,9 +263,22 @@ MasterService::MasterService(const MasterServiceConfig& config) } } + if (enable_multi_tenants_) { + auto store = CreateTenantQuotaPolicyStore(tenant_quota_connector_type_, + tenant_quota_connector_uri_); + if (!store) { + throw std::invalid_argument(store.error()); + } + tenant_quota_policy_store_ = std::move(store.value()); + } + if (enable_snapshot_restore_) { RestoreState(); } + if (enable_multi_tenants_) { + LoadTenantQuotaPoliciesFromStoreOrThrow(); + RebuildTenantQuotaUsageFromMetadata(); + } if (enable_snapshot_ && snapshot_retention_count_ == 0) { LOG(ERROR) << "snapshot_retention_count must be greater than 0"; throw std::invalid_argument("snapshot_retention_count must be > 0"); @@ -548,7 +576,7 @@ MasterService::GetTenantQuotaSnapshotForTesting( } bool MasterService::IsTenantQuotaEnabled() const { - return enable_tenant_quota_; + return enable_multi_tenants_; } std::vector MasterService::ListTenantQuotaSnapshots() @@ -579,81 +607,114 @@ std::optional MasterService::GetTenantQuotaSnapshot( tl::expected MasterService::UpsertTenantQuotaPolicy(const std::string& tenant_id, uint64_t requested_quota_bytes) { - if (!enable_tenant_quota_) { + if (!enable_multi_tenants_) { return tl::make_unexpected(ErrorCode::UNAVAILABLE_IN_CURRENT_MODE); } if (requested_quota_bytes == 0) { return tl::make_unexpected(ErrorCode::INVALID_PARAMS); } + if (tenant_id.empty()) { + return tl::make_unexpected(ErrorCode::INVALID_PARAMS); + } const auto normalized_tenant = NormalizeTenantId(tenant_id); - auto& shard = - tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; - { - std::lock_guard lock(shard.mutex); - auto& state = shard.tenants[normalized_tenant]; - state.requested_quota_bytes = requested_quota_bytes; - state.has_explicit_policy = true; + if (!IsValidTenantId(normalized_tenant)) { + return tl::make_unexpected(ErrorCode::INVALID_PARAMS); } - RecomputeTenantEffectiveQuotas(); - auto snapshot = GetTenantQuotaSnapshot(normalized_tenant); - if (!snapshot.has_value()) { + std::lock_guard policy_lock(tenant_quota_policy_mutex_); + auto policy = BuildTenantQuotaPolicySnapshot(); + policy.tenant_quotas[normalized_tenant] = requested_quota_bytes; + auto save_result = tenant_quota_policy_store_->Save(policy); + if (!save_result) { + LOG(ERROR) << "failed to save tenant quota policy: " + << save_result.error(); + return tl::make_unexpected(ErrorCode::PERSISTENT_FAIL); + } + ApplyTenantQuotaPolicies(policy); + auto result_snapshot = GetTenantQuotaSnapshot(normalized_tenant); + if (!result_snapshot.has_value()) { return tl::make_unexpected(ErrorCode::INTERNAL_ERROR); } - return snapshot.value(); + return result_snapshot.value(); } -std::optional MasterService::DeleteTenantQuotaPolicy( - const std::string& tenant_id) { - if (!enable_tenant_quota_) { - return std::nullopt; +tl::expected, ErrorCode> +MasterService::DeleteTenantQuotaPolicy(const std::string& tenant_id) { + if (!enable_multi_tenants_) { + return tl::make_unexpected(ErrorCode::UNAVAILABLE_IN_CURRENT_MODE); } + if (tenant_id.empty()) { + return tl::make_unexpected(ErrorCode::INVALID_PARAMS); + } const auto normalized_tenant = NormalizeTenantId(tenant_id); - auto& shard = - tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + if (!IsValidTenantId(normalized_tenant)) { + return tl::make_unexpected(ErrorCode::INVALID_PARAMS); + } + + std::lock_guard policy_lock(tenant_quota_policy_mutex_); + auto policy = BuildTenantQuotaPolicySnapshot(); + auto policy_it = policy.tenant_quotas.find(normalized_tenant); + if (policy_it == policy.tenant_quotas.end()) { + return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); + } + const uint64_t requested_quota_bytes = policy_it->second; + + auto restore_policy = [&] { + auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + { + std::lock_guard lock(shard.mutex); + auto& state = shard.tenants[normalized_tenant]; + state.requested_quota_bytes = requested_quota_bytes; + state.has_explicit_policy = true; + RefreshTenantQuotaOverQuota(state); + } + RecomputeTenantEffectiveQuotas(); + }; + { + auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; std::lock_guard lock(shard.mutex); - auto it = shard.tenants.find(normalized_tenant); - if (it == shard.tenants.end()) { - return std::nullopt; + auto quota_it = shard.tenants.find(normalized_tenant); + if (quota_it == shard.tenants.end() || + !quota_it->second.has_explicit_policy) { + return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); } - - auto& state = it->second; - state.has_explicit_policy = false; - state.requested_quota_bytes = - default_tenant_quota_bytes_.load(std::memory_order_relaxed); - if (IsLazyEmptyTenantQuotaState(state)) { - shard.tenants.erase(it); + auto& state = quota_it->second; + if (state.used_bytes != 0 || state.reserved_bytes != 0 || + state.committed_count != 0 || state.metadata_object_count != 0) { + return tl::make_unexpected(ErrorCode::TENANT_NOT_EMPTY); } + state.has_explicit_policy = false; + state.requested_quota_bytes = 0; + state.effective_quota_bytes = 0; + RefreshTenantQuotaOverQuota(state); } - RecomputeTenantEffectiveQuotas(); - return GetTenantQuotaSnapshot(normalized_tenant); -} - -uint64_t MasterService::GetDefaultTenantQuotaPolicy() const { - return default_tenant_quota_bytes_.load(std::memory_order_relaxed); -} - -void MasterService::SetDefaultTenantQuotaPolicy( - uint64_t requested_quota_bytes) { - if (!enable_tenant_quota_) { - return; + auto post_mark_snapshot = GetTenantQuotaSnapshot(normalized_tenant); + if (TenantHasObjects(normalized_tenant) || + (post_mark_snapshot.has_value() && + (post_mark_snapshot->used_bytes != 0 || + post_mark_snapshot->reserved_bytes != 0 || + post_mark_snapshot->committed_count != 0 || + post_mark_snapshot->metadata_object_count != 0))) { + restore_policy(); + return tl::make_unexpected(ErrorCode::TENANT_NOT_EMPTY); } - default_tenant_quota_bytes_.store(requested_quota_bytes, - std::memory_order_relaxed); - for (size_t i = 0; i < kNumTenantQuotaShards; ++i) { - auto& shard = tenant_quota_shards_[i]; - std::lock_guard lock(shard.mutex); - for (auto& [_, state] : shard.tenants) { - if (!state.has_explicit_policy) { - state.requested_quota_bytes = requested_quota_bytes; - } - } + + policy.tenant_quotas.erase(policy_it); + auto save_result = tenant_quota_policy_store_->Save(policy); + if (!save_result) { + restore_policy(); + LOG(ERROR) << "failed to save tenant quota policy: " + << save_result.error(); + return tl::make_unexpected(ErrorCode::PERSISTENT_FAIL); } - RecomputeTenantEffectiveQuotas(); + ApplyTenantQuotaPolicies(policy); + return GetTenantQuotaSnapshot(normalized_tenant); } auto MasterService::MountSegment(const Segment& segment, const UUID& client_id) @@ -822,6 +883,154 @@ size_t MasterService::getTenantQuotaShardIndex( kNumTenantQuotaShards; } +std::string MasterService::NormalizeRequestTenantId( + const std::string& tenant_id) const { + if (!enable_multi_tenants_) { + return "default"; + } + return NormalizeTenantId(tenant_id); +} + +MasterService::ObjectIdentity MasterService::MakeObjectIdentityForRequest( + const std::string& user_key, const std::string& tenant_id) const { + return {NormalizeRequestTenantId(tenant_id), user_key}; +} + +bool MasterService::IsTenantRegistered(const std::string& tenant_id) const { + if (!enable_multi_tenants_) { + return true; + } + const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + std::lock_guard lock(shard.mutex); + auto it = shard.tenants.find(normalized_tenant); + return it != shard.tenants.end() && it->second.has_explicit_policy; +} + +tl::expected MasterService::NormalizeTenantIdForWrite( + const std::string& tenant_id) const { + if (!enable_multi_tenants_) { + return std::string("default"); + } + std::lock_guard policy_lock(tenant_quota_policy_mutex_); + return NormalizeTenantIdForWriteLocked(tenant_id); +} + +tl::expected +MasterService::NormalizeTenantIdForWriteLocked( + const std::string& tenant_id) const { + if (!enable_multi_tenants_) { + return std::string("default"); + } + if (tenant_id.empty()) { + return tl::make_unexpected(ErrorCode::TENANT_NOT_REGISTERED); + } + const auto normalized_tenant = NormalizeTenantId(tenant_id); + if (!IsValidTenantId(normalized_tenant)) { + return tl::make_unexpected(ErrorCode::TENANT_NOT_REGISTERED); + } + if (!IsTenantRegistered(normalized_tenant)) { + return tl::make_unexpected(ErrorCode::TENANT_NOT_REGISTERED); + } + return normalized_tenant; +} + +bool MasterService::TenantHasObjects(const std::string& tenant_id) const { + const auto normalized_tenant = NormalizeTenantId(tenant_id); + for (size_t i = 0; i < kNumShards; ++i) { + MetadataShardAccessorRO shard(this, i); + auto tenant_it = shard->tenants.find(normalized_tenant); + if (tenant_it != shard->tenants.end() && + !tenant_it->second.metadata.empty()) { + return true; + } + } + return false; +} + +TenantQuotaPolicySnapshot MasterService::BuildTenantQuotaPolicySnapshot() + const { + TenantQuotaPolicySnapshot snapshot; + for (size_t i = 0; i < kNumTenantQuotaShards; ++i) { + const auto& shard = tenant_quota_shards_[i]; + std::lock_guard lock(shard.mutex); + for (const auto& [tenant_id, state] : shard.tenants) { + if (state.has_explicit_policy) { + snapshot.tenant_quotas[tenant_id] = state.requested_quota_bytes; + } + } + } + return snapshot; +} + +void MasterService::ApplyTenantQuotaPolicies( + const TenantQuotaPolicySnapshot& snapshot) { + std::array>, + kNumTenantQuotaShards> + grouped_quotas; + for (const auto& [tenant_id, requested_quota_bytes] : + snapshot.tenant_quotas) { + grouped_quotas[getTenantQuotaShardIndex(tenant_id)].emplace_back( + tenant_id, requested_quota_bytes); + } + + for (size_t i = 0; i < kNumTenantQuotaShards; ++i) { + auto& shard = tenant_quota_shards_[i]; + std::lock_guard lock(shard.mutex); + for (auto it = shard.tenants.begin(); it != shard.tenants.end();) { + const auto policy_it = snapshot.tenant_quotas.find(it->first); + auto& state = it->second; + if (policy_it != snapshot.tenant_quotas.end()) { + state.requested_quota_bytes = policy_it->second; + state.has_explicit_policy = true; + RefreshTenantQuotaOverQuota(state); + ++it; + } else { + state.has_explicit_policy = false; + state.requested_quota_bytes = 0; + state.effective_quota_bytes = 0; + if (IsLazyEmptyTenantQuotaState(state)) { + it = shard.tenants.erase(it); + } else { + RefreshTenantQuotaOverQuota(state); + ++it; + } + } + } + + for (const auto& [tenant_id, requested_quota_bytes] : + grouped_quotas[i]) { + auto [tenant_it, inserted] = shard.tenants.try_emplace(tenant_id); + if (!inserted) { + continue; + } + auto& state = tenant_it->second; + state.requested_quota_bytes = requested_quota_bytes; + state.has_explicit_policy = true; + RefreshTenantQuotaOverQuota(state); + } + } + RecomputeTenantEffectiveQuotas(); +} + +void MasterService::LoadTenantQuotaPoliciesFromStoreOrThrow() { + if (!enable_multi_tenants_) { + return; + } + if (!tenant_quota_policy_store_) { + throw std::runtime_error( + "tenant quota policy store is not initialized"); + } + std::lock_guard policy_lock(tenant_quota_policy_mutex_); + auto snapshot = tenant_quota_policy_store_->Load(); + if (!snapshot) { + throw std::runtime_error("failed to load tenant quota policy: " + + snapshot.error()); + } + ApplyTenantQuotaPolicies(snapshot.value()); +} + uint64_t MasterService::CompletedMemoryQuotaCharge( const ObjectMetadata& metadata) const { return static_cast(metadata.size) * @@ -840,6 +1049,11 @@ uint64_t MasterService::RequestedMemoryQuotaCharge( return static_cast(charge); } +bool MasterService::ShouldProtectZeroChargeMetadataCreate( + uint64_t requested_quota_charge) const { + return enable_multi_tenants_ && requested_quota_charge == 0; +} + uint64_t MasterService::ComputeTenantQuotaDeficit( const std::string& tenant_id, uint64_t incoming_quota_charge) { uint64_t deficit = incoming_quota_charge; @@ -883,20 +1097,14 @@ uint64_t MasterService::GetTenantQuotaCapacityBytes() { } uint64_t MasterService::GetTenantQuotaAllocatableCapacityBytes() { - const uint64_t total_registered_memory = GetTenantQuotaCapacityBytes(); - if (tenant_quota_pool_capacity_bytes_ == 0) { - return total_registered_memory; - } - return std::min(total_registered_memory, tenant_quota_pool_capacity_bytes_); + return GetTenantQuotaCapacityBytes(); } void MasterService::RecomputeTenantEffectiveQuotas() { - if (!enable_tenant_quota_) { + if (!enable_multi_tenants_) { return; } std::lock_guard recompute_lock(tenant_quota_recompute_mutex_); - const uint64_t default_requested_quota_bytes = - default_tenant_quota_bytes_.load(std::memory_order_relaxed); const uint64_t capacity = GetTenantQuotaAllocatableCapacityBytes(); std::map active_tenants; @@ -906,20 +1114,22 @@ void MasterService::RecomputeTenantEffectiveQuotas() { for (auto it = shard.tenants.begin(); it != shard.tenants.end();) { const auto& tenant_id = it->first; auto& state = it->second; - if (!state.has_explicit_policy) { - state.requested_quota_bytes = default_requested_quota_bytes; - } if (IsLazyEmptyTenantQuotaState(state)) { it = shard.tenants.erase(it); continue; } + if (!state.has_explicit_policy) { + state.requested_quota_bytes = 0; + state.effective_quota_bytes = 0; + RefreshTenantQuotaOverQuota(state); + } active_tenants.emplace(tenant_id, state); ++it; } } - for (const auto& assignment : BuildEffectiveQuotaAssignments( - active_tenants, default_requested_quota_bytes, capacity)) { + for (const auto& assignment : + BuildEffectiveQuotaAssignments(active_tenants, capacity)) { auto& shard = tenant_quota_shards_[getTenantQuotaShardIndex( assignment.tenant_id)]; std::lock_guard lock(shard.mutex); @@ -935,7 +1145,7 @@ void MasterService::RecomputeTenantEffectiveQuotas() { tl::expected MasterService::ReserveTenantQuota( const std::string& tenant_id, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_) { return {}; } const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -950,68 +1160,27 @@ tl::expected MasterService::ReserveTenantQuota( { std::lock_guard lock(shard.mutex); auto it = shard.tenants.find(normalized_tenant); - if (it != shard.tenants.end()) { - auto& state = it->second; - if (exceeds_effective_quota(state, bytes)) { - return tl::make_unexpected(ErrorCode::TENANT_QUOTA_EXCEEDED); - } - - state.reserved_bytes += bytes; - RefreshTenantQuotaOverQuota(state); - return {}; - } - - auto [insert_it, _] = shard.tenants.try_emplace(normalized_tenant); - auto& state = insert_it->second; - state.requested_quota_bytes = - default_tenant_quota_bytes_.load(std::memory_order_relaxed); - state.effective_quota_bytes = 0; - state.over_quota = false; - - state.reserved_bytes = bytes; - } - - RecomputeTenantEffectiveQuotas(); - - bool rollback_needs_recompute = false; - { - std::lock_guard lock(shard.mutex); - auto it = shard.tenants.find(normalized_tenant); - if (it == shard.tenants.end()) { - return tl::make_unexpected(ErrorCode::TENANT_QUOTA_EXCEEDED); + if (it == shard.tenants.end() || !it->second.has_explicit_policy) { + return tl::make_unexpected(ErrorCode::TENANT_NOT_REGISTERED); } auto& state = it->second; - if (!exceeds_effective_quota(state, 0)) { - RefreshTenantQuotaOverQuota(state); + if (bytes == 0) { return {}; } - - if (state.reserved_bytes < bytes) { - LOG(ERROR) << "tenant quota reserve rollback mismatch tenant=" - << normalized_tenant << ", bytes=" << bytes - << ", reserved=" << state.reserved_bytes; + if (exceeds_effective_quota(state, bytes)) { return tl::make_unexpected(ErrorCode::TENANT_QUOTA_EXCEEDED); } - state.reserved_bytes -= bytes; - if (!state.has_explicit_policy && state.used_bytes == 0 && - state.reserved_bytes == 0 && state.committed_count == 0) { - shard.tenants.erase(it); - rollback_needs_recompute = true; - } else { - RefreshTenantQuotaOverQuota(state); - } - } - if (rollback_needs_recompute) { - RecomputeTenantEffectiveQuotas(); + state.reserved_bytes += bytes; + RefreshTenantQuotaOverQuota(state); + return {}; } - return tl::make_unexpected(ErrorCode::TENANT_QUOTA_EXCEEDED); } void MasterService::CommitTenantQuota(const std::string& tenant_id, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_ || bytes == 0) { return; } const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -1044,7 +1213,7 @@ void MasterService::CommitTenantQuota(const std::string& tenant_id, void MasterService::AbortTenantQuota(const std::string& tenant_id, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_ || bytes == 0) { return; } const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -1068,8 +1237,7 @@ void MasterService::AbortTenantQuota(const std::string& tenant_id, return; } state.reserved_bytes -= bytes; - if (!state.has_explicit_policy && state.used_bytes == 0 && - state.reserved_bytes == 0 && state.committed_count == 0) { + if (IsLazyEmptyTenantQuotaState(state)) { shard.tenants.erase(it); recompute_needed = true; } else { @@ -1083,7 +1251,7 @@ void MasterService::AbortTenantQuota(const std::string& tenant_id, void MasterService::ReleaseTenantQuota(const std::string& tenant_id, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_ || bytes == 0) { return; } const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -1110,8 +1278,7 @@ void MasterService::ReleaseTenantQuota(const std::string& tenant_id, if (state.committed_count > 0) { --state.committed_count; } - if (!state.has_explicit_policy && state.used_bytes == 0 && - state.reserved_bytes == 0 && state.committed_count == 0) { + if (IsLazyEmptyTenantQuotaState(state)) { shard.tenants.erase(it); recompute_needed = true; } else { @@ -1125,7 +1292,7 @@ void MasterService::ReleaseTenantQuota(const std::string& tenant_id, void MasterService::ReleaseTenantQuotaPartial(const std::string& tenant_id, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_ || bytes == 0) { return; } const auto normalized_tenant = NormalizeTenantId(tenant_id); @@ -1149,9 +1316,98 @@ void MasterService::ReleaseTenantQuotaPartial(const std::string& tenant_id, RefreshTenantQuotaOverQuota(state); } +void MasterService::CommitAdditionalTenantQuota(const std::string& tenant_id, + uint64_t bytes) { + if (!enable_multi_tenants_ || bytes == 0) { + return; + } + const auto normalized_tenant = NormalizeTenantId(tenant_id); + auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + std::lock_guard lock(shard.mutex); + auto it = shard.tenants.find(normalized_tenant); + if (it == shard.tenants.end()) { + LOG(ERROR) << "tenant quota additional commit mismatch tenant=" + << normalized_tenant << ", bytes=" << bytes + << ", reserved=0"; + return; + } + auto& state = it->second; + if (state.reserved_bytes < bytes) { + LOG(ERROR) << "tenant quota additional commit mismatch tenant=" + << normalized_tenant << ", bytes=" << bytes + << ", reserved=" << state.reserved_bytes; + return; + } + state.reserved_bytes -= bytes; + if (state.used_bytes > std::numeric_limits::max() - bytes) { + state.used_bytes = std::numeric_limits::max(); + } else { + state.used_bytes += bytes; + } + RefreshTenantQuotaOverQuota(state); +} + +void MasterService::AbortReplicationTaskQuota(const std::string& tenant_id, + const ReplicationTask& task) { + AbortTenantQuota(tenant_id, task.reserved_quota_charge_bytes); +} + +void MasterService::IncrementTenantMetadataObjectCount( + const std::string& tenant_id) { + if (!enable_multi_tenants_) { + return; + } + const auto normalized_tenant = NormalizeTenantId(tenant_id); + auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + std::lock_guard lock(shard.mutex); + auto& state = shard.tenants[normalized_tenant]; + if (state.metadata_object_count < std::numeric_limits::max()) { + ++state.metadata_object_count; + } + RefreshTenantQuotaOverQuota(state); +} + +void MasterService::DecrementTenantMetadataObjectCount( + const std::string& tenant_id) { + if (!enable_multi_tenants_) { + return; + } + const auto normalized_tenant = NormalizeTenantId(tenant_id); + auto& shard = + tenant_quota_shards_[getTenantQuotaShardIndex(normalized_tenant)]; + bool recompute_needed = false; + { + std::lock_guard lock(shard.mutex); + auto it = shard.tenants.find(normalized_tenant); + if (it == shard.tenants.end()) { + LOG(WARNING) << "tenant metadata object count decrement mismatch " + << "tenant=" << normalized_tenant; + return; + } + auto& state = it->second; + if (state.metadata_object_count > 0) { + --state.metadata_object_count; + } else { + LOG(WARNING) << "tenant metadata object count underflow tenant=" + << normalized_tenant; + } + if (IsLazyEmptyTenantQuotaState(state)) { + shard.tenants.erase(it); + recompute_needed = true; + } else { + RefreshTenantQuotaOverQuota(state); + } + } + if (recompute_needed) { + RecomputeTenantEffectiveQuotas(); + } +} + void MasterService::ReleaseCommittedQuotaCharge(ObjectMetadata& metadata, uint64_t bytes) { - if (!enable_tenant_quota_ || bytes == 0) { + if (!enable_multi_tenants_ || bytes == 0) { return; } const uint64_t release_bytes = @@ -1165,16 +1421,20 @@ void MasterService::ReleaseCommittedQuotaCharge(ObjectMetadata& metadata, } void MasterService::RebuildTenantQuotaUsageFromMetadata() { - if (!enable_tenant_quota_) { + if (!enable_multi_tenants_) { return; } + std::unordered_set metadata_tenants; std::unordered_map used_by_tenant; std::unordered_map committed_count_by_tenant; + std::unordered_map metadata_count_by_tenant; for (size_t i = 0; i < kNumShards; ++i) { MetadataShardAccessorRW shard(this, i); for (auto& [tenant_id, tenant_state] : shard->tenants) { + metadata_tenants.insert(tenant_id); for (auto& [_, metadata] : tenant_state.metadata) { + metadata_count_by_tenant[tenant_id]++; const uint64_t charge = CompletedMemoryQuotaCharge(metadata); metadata.reserved_quota_charge_bytes = 0; metadata.committed_quota_charge_bytes = charge; @@ -1195,19 +1455,27 @@ void MasterService::RebuildTenantQuotaUsageFromMetadata() { state.used_bytes = 0; state.reserved_bytes = 0; state.committed_count = 0; + state.metadata_object_count = 0; } } - for (const auto& [tenant_id, used_bytes] : used_by_tenant) { + for (const auto& tenant_id : metadata_tenants) { auto& shard = tenant_quota_shards_[getTenantQuotaShardIndex(tenant_id)]; std::lock_guard lock(shard.mutex); auto [it, inserted] = shard.tenants.try_emplace(tenant_id); auto& state = it->second; if (inserted || !state.has_explicit_policy) { - state.requested_quota_bytes = - default_tenant_quota_bytes_.load(std::memory_order_relaxed); + state.requested_quota_bytes = 0; + state.effective_quota_bytes = 0; + state.has_explicit_policy = false; + LOG(WARNING) + << "tenant " << tenant_id + << " exists in metadata but has no connector quota policy; " + "creating orphan quota state"; } - state.used_bytes = used_bytes; + state.used_bytes = used_by_tenant[tenant_id]; state.committed_count = committed_count_by_tenant[tenant_id]; + state.metadata_object_count = metadata_count_by_tenant[tenant_id]; + RefreshTenantQuotaOverQuota(state); } RecomputeTenantEffectiveQuotas(); } @@ -1407,6 +1675,7 @@ MasterService::EraseMetadata( break; } auto next = tenant_state.metadata.erase(it); + DecrementTenantMetadataObjectCount(tenant_id); if (had_completed_disk && shard) { shard->OnDiskReplicaRemoved(had_completed_disk); } @@ -1527,7 +1796,13 @@ void MasterService::ClearInvalidHandles( while (it != tenant_state.metadata.end()) { if (CleanupStaleHandles(it->second, alive_clients, &shard)) { tenant_state.processing_keys.erase(it->first); - tenant_state.replication_tasks.erase(it->first); + auto task_it = + tenant_state.replication_tasks.find(it->first); + if (task_it != tenant_state.replication_tasks.end()) { + AbortReplicationTaskQuota(tenant_it->first, + task_it->second); + tenant_state.replication_tasks.erase(task_it); + } tenant_state.offloading_tasks.erase(it->first); ErasePromotionTaskIfPresent(tenant_state, it->first, tenant_it->first); @@ -1695,7 +1970,8 @@ auto MasterService::ExistKey(const std::string& key, const std::string& tenant_id) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - MetadataAccessorRO accessor(this, MakeObjectIdentity(key, tenant_id)); + MetadataAccessorRO accessor(this, + MakeObjectIdentityForRequest(key, tenant_id)); if (!accessor.Exists()) { VLOG(1) << "key=" << key << ", info=object_not_found"; return false; @@ -1720,7 +1996,7 @@ auto MasterService::ExistKey(const std::string& key, std::vector> MasterService::BatchExistKey( const std::vector& keys, const std::string& tenant_id) { - const std::string normalized_tenant = NormalizeTenantId(tenant_id); + const std::string normalized_tenant = NormalizeRequestTenantId(tenant_id); std::vector> results(keys.size()); if (keys.empty()) { return results; @@ -1789,7 +2065,7 @@ std::vector> MasterService::BatchExistKey( auto MasterService::GetAllKeys(const std::string& tenant_id) -> tl::expected, ErrorCode> { std::vector all_keys; - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); for (size_t i = 0; i < kNumShards; i++) { MetadataShardAccessorRO shard(this, i); auto tenant_it = shard->tenants.find(normalized_tenant); @@ -2102,7 +2378,7 @@ auto MasterService::GetReplicaListByRegex(const std::string& regex_pattern, } std::shared_lock shared_lock(snapshot_mutex_); - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); for (size_t i = 0; i < kNumShards; ++i) { MetadataShardAccessorRO shard(this, i); auto tenant_it = shard->tenants.find(normalized_tenant); @@ -2138,7 +2414,7 @@ auto MasterService::GetReplicaList(const std::string& key, const std::string& tenant_id) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); GetReplicaListResponse resp({}, default_kv_lease_ttl_); bool promotion_eligible = false; @@ -2214,7 +2490,7 @@ MasterService::BatchGetReplicaList(const std::vector& keys, return results; } - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); constexpr size_t kInvalidKeyIndex = std::numeric_limits::max(); std::array key_list_heads; key_list_heads.fill(kInvalidKeyIndex); @@ -2496,6 +2772,7 @@ auto MasterService::AllocateAndInsertMetadata( abort_reserved_quota(); return tl::make_unexpected(ErrorCode::OBJECT_ALREADY_EXISTS); } + IncrementTenantMetadataObjectCount(tenant_id); it->second.reserved_quota_charge_bytes = reserved_quota_charge; RegisterGroupMember(tenant_state, tenant_id, key, group_id); tenant_state.processing_keys.insert(key); @@ -2508,7 +2785,11 @@ auto MasterService::PutStart(const UUID& client_id, const std::string& key, const uint64_t slice_length, const ReplicateConfig& config) -> tl::expected, ErrorCode> { - const auto object_id = MakeObjectIdentity(key, tenant_id); + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; if ((config.replica_num == 0 && config.nof_replica_num == 0) || key.empty() || slice_length == 0) { LOG(ERROR) << "key=" << key << ", replica_num=" << config.replica_num @@ -2558,6 +2839,17 @@ auto MasterService::PutStart(const UUID& client_id, const std::string& key, auto attempt_once = [&]() -> tl::expected, ErrorCode> { + std::unique_lock zero_charge_policy_lock( + tenant_quota_policy_mutex_, std::defer_lock); + if (ShouldProtectZeroChargeMetadataCreate(requested_quota_charge)) { + zero_charge_policy_lock.lock(); + auto latest_tenant_result = + NormalizeTenantIdForWriteLocked(tenant_id); + if (!latest_tenant_result) { + return tl::make_unexpected(latest_tenant_result.error()); + } + } + auto now = std::chrono::system_clock::now(); std::optional retry_shard_idx; { @@ -2572,7 +2864,12 @@ auto MasterService::PutStart(const UUID& client_id, const std::string& key, if (it != tenant_state.metadata.end()) { if (CleanupStaleHandles(it->second, alive_clients, &shard)) { tenant_state.processing_keys.erase(key); - tenant_state.replication_tasks.erase(key); + auto task_it = tenant_state.replication_tasks.find(key); + if (task_it != tenant_state.replication_tasks.end()) { + AbortReplicationTaskQuota(object_id.tenant_id, + task_it->second); + tenant_state.replication_tasks.erase(task_it); + } tenant_state.offloading_tasks.erase(key); ErasePromotionTaskIfPresent(tenant_state, key, object_id.tenant_id); @@ -2663,7 +2960,7 @@ auto MasterService::PutEnd(const UUID& client_id, const std::string& key, ReplicaType replica_type) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { LOG(ERROR) << "key=" << key << ", error=object_not_found"; @@ -2756,9 +3053,21 @@ auto MasterService::PutEnd(const UUID& client_id, const std::string& key, auto MasterService::AddReplica(const UUID& client_id, const std::string& key, const std::string& tenant_id, Replica& replica) - -> tl::expected { + -> tl::expected { + std::string normalized_tenant = "default"; + std::unique_lock policy_lock(tenant_quota_policy_mutex_, + std::defer_lock); + if (enable_multi_tenants_) { + policy_lock.lock(); + auto normalized_tenant_result = + NormalizeTenantIdForWriteLocked(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + normalized_tenant = std::move(normalized_tenant_result.value()); + } std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const ObjectIdentity object_id{normalized_tenant, key}; MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { accessor.Create( @@ -2779,7 +3088,7 @@ auto MasterService::AddReplica(const UUID& client_id, const std::string& key, metadata.AddReplicas(std::move(replicas)); auto& shard = accessor.GetShard(); shard.OnDiskReplicaAdded(metadata); - return {}; + return true; } metadata.VisitReplicas( @@ -2799,7 +3108,7 @@ auto MasterService::AddReplica(const UUID& client_id, const std::string& key, .get_local_disk_descriptor() .object_size; }); - return {}; + return false; } auto MasterService::PutRevoke(const UUID& client_id, const std::string& key, @@ -2807,7 +3116,7 @@ auto MasterService::PutRevoke(const UUID& client_id, const std::string& key, ReplicaType replica_type) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { LOG(INFO) << "key=" << key << ", info=object_not_found"; @@ -2915,7 +3224,11 @@ auto MasterService::UpsertStart(const UUID& client_id, const std::string& key, const uint64_t slice_length, const ReplicateConfig& config) -> tl::expected, ErrorCode> { - const auto object_id = MakeObjectIdentity(key, tenant_id); + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; // --- Parameter validation (same as PutStart) --- if ((config.replica_num == 0 && config.nof_replica_num == 0) || key.empty() || slice_length == 0) { @@ -2966,6 +3279,17 @@ auto MasterService::UpsertStart(const UUID& client_id, const std::string& key, auto attempt_once = [&]() -> tl::expected, ErrorCode> { + std::unique_lock zero_charge_policy_lock( + tenant_quota_policy_mutex_, std::defer_lock); + if (ShouldProtectZeroChargeMetadataCreate(requested_quota_charge)) { + zero_charge_policy_lock.lock(); + auto latest_tenant_result = + NormalizeTenantIdForWriteLocked(tenant_id); + if (!latest_tenant_result) { + return tl::make_unexpected(latest_tenant_result.error()); + } + } + auto now = std::chrono::system_clock::now(); std::optional case_a_retry_shard_idx; { @@ -3284,7 +3608,7 @@ auto MasterService::EvictDiskReplica(const UUID& client_id, const std::string& tenant_id, ReplicaType replica_type) -> tl::expected { - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { LOG(INFO) << "key=" << key << ", tenant_id=" << object_id.tenant_id @@ -3344,8 +3668,12 @@ tl::expected MasterService::CopyStart( const UUID& client_id, const std::string& key, const std::string& tenant_id, const std::string& src_segment, const std::vector& tgt_segments) { + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); { ScopedSegmentAccess segment_access = segment_manager_.getSegmentAccess(); @@ -3384,12 +3712,32 @@ tl::expected MasterService::CopyStart( return tl::make_unexpected(ErrorCode::REPLICA_NOT_FOUND); } + size_t new_replica_count = 0; + for (const auto& tgt_segment : tgt_segments) { + if (metadata.GetReplicaBySegmentName(tgt_segment) == nullptr) { + ++new_replica_count; + } + } + + const uint64_t reserved_quota_charge = + SaturatingMultiply(static_cast(metadata.size), + static_cast(new_replica_count)); + auto quota_result = + ReserveTenantQuota(object_id.tenant_id, reserved_quota_charge); + if (!quota_result) { + if (quota_result.error() == ErrorCode::TENANT_QUOTA_EXCEEDED) { + MasterMetricManager::instance().inc_tenant_quota_reject( + object_id.tenant_id, "quota_exceeded"); + } + return tl::make_unexpected(quota_result.error()); + } + auto abort_reserved_quota = [&] { + AbortTenantQuota(object_id.tenant_id, reserved_quota_charge); + }; + std::vector replicas; - replicas.reserve(tgt_segments.size()); + replicas.reserve(new_replica_count); { - // PR2 limitation: Copy can allocate extra physical MEMORY replicas - // without tenant quota admission. It does not change the logical - // object set; full quota-aware Copy admission is deferred. ScopedAllocatorAccess allocator_access = segment_manager_.getAllocatorAccess(); const auto& allocator_manager = allocator_access.getAllocatorManager(); @@ -3405,6 +3753,7 @@ tl::expected MasterService::CopyStart( if (!replica.has_value()) { LOG(ERROR) << "key=" << key << ", tgt_segment=" << tgt_segment << ", failed to allocate replica"; + abort_reserved_quota(); return tl::make_unexpected(replica.error()); } replicas.push_back(std::move(*replica)); @@ -3424,11 +3773,15 @@ tl::expected MasterService::CopyStart( // Create replication task for tracking. auto& tenant_state = accessor.GetTenantState(); - tenant_state.replication_tasks.emplace( + auto task_insert = tenant_state.replication_tasks.emplace( std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple(client_id, std::chrono::system_clock::now(), ReplicationTask::Type::COPY, source->id(), - std::move(replica_ids))); + std::move(replica_ids), reserved_quota_charge)); + if (!task_insert.second) { + abort_reserved_quota(); + return tl::make_unexpected(ErrorCode::OBJECT_HAS_REPLICATION_TASK); + } // Increase source refcnt to protect it from eviction. source->inc_refcnt(); @@ -3444,7 +3797,8 @@ tl::expected MasterService::CopyEnd( const UUID& client_id, const std::string& key, const std::string& tenant_id) { std::shared_lock shared_lock(snapshot_mutex_); - MetadataAccessorRW accessor(this, MakeObjectIdentity(key, tenant_id)); + MetadataAccessorRW accessor(this, + MakeObjectIdentityForRequest(key, tenant_id)); if (!accessor.Exists()) { LOG(ERROR) << "key=" << key << ", error=object_not_found"; return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -3483,6 +3837,7 @@ tl::expected MasterService::CopyEnd( task.replica_ids.end(), replica.id()) != task.replica_ids.end(); }); + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); if (!metadata.IsValid()) { // Remove the object if it does not have any replicas. @@ -3496,6 +3851,7 @@ tl::expected MasterService::CopyEnd( // Mark all replica_ids as complete bool all_complete = true; + uint64_t completed_quota_charge = 0; for (const auto& replica_id : task.replica_ids) { auto replica = metadata.GetReplicaByID(replica_id); if (replica == nullptr || replica->has_invalid_mem_handle()) { @@ -3505,10 +3861,21 @@ tl::expected MasterService::CopyEnd( all_complete = false; } else { replica->mark_complete(); + completed_quota_charge = SaturatingAdd( + completed_quota_charge, static_cast(metadata.size)); } } SyncCacheTotalAccounting(metadata); + const uint64_t commit_charge = + std::min(completed_quota_charge, task.reserved_quota_charge_bytes); + const uint64_t abort_charge = + task.reserved_quota_charge_bytes - commit_charge; + CommitAdditionalTenantQuota(metadata.tenant_id, commit_charge); + AbortTenantQuota(metadata.tenant_id, abort_charge); + metadata.committed_quota_charge_bytes = + SaturatingAdd(metadata.committed_quota_charge_bytes, commit_charge); + accessor.EraseReplicationTask(); return all_complete ? tl::expected() @@ -3519,7 +3886,8 @@ tl::expected MasterService::CopyRevoke( const UUID& client_id, const std::string& key, const std::string& tenant_id) { std::shared_lock shared_lock(snapshot_mutex_); - MetadataAccessorRW accessor(this, MakeObjectIdentity(key, tenant_id)); + MetadataAccessorRW accessor(this, + MakeObjectIdentityForRequest(key, tenant_id)); if (!accessor.Exists()) { LOG(ERROR) << "key=" << key << ", error=object_not_found"; return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -3562,6 +3930,7 @@ tl::expected MasterService::CopyRevoke( }); } + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); if (!metadata.IsValid()) { @@ -3575,8 +3944,12 @@ tl::expected MasterService::CopyRevoke( tl::expected MasterService::MoveStart( const UUID& client_id, const std::string& key, const std::string& tenant_id, const std::string& src_segment, const std::string& tgt_segment) { + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); if (src_segment == tgt_segment) { LOG(ERROR) << "key=" << key << ", move_tgt=" << tgt_segment << " cannot be the same as move_src=" << src_segment; @@ -3621,9 +3994,21 @@ tl::expected MasterService::MoveStart( std::vector replicas; if (metadata.GetReplicaBySegmentName(tgt_segment) == nullptr) { - // PR2 limitation: Move can allocate a replacement physical MEMORY - // replica without tenant quota admission. Logical object accounting is - // unchanged; full quota-aware Move admission is deferred. + const uint64_t reserved_quota_charge = + SaturatingMultiply(static_cast(metadata.size), 1); + auto quota_result = + ReserveTenantQuota(object_id.tenant_id, reserved_quota_charge); + if (!quota_result) { + if (quota_result.error() == ErrorCode::TENANT_QUOTA_EXCEEDED) { + MasterMetricManager::instance().inc_tenant_quota_reject( + object_id.tenant_id, "quota_exceeded"); + } + return tl::make_unexpected(quota_result.error()); + } + auto abort_reserved_quota = [&] { + AbortTenantQuota(object_id.tenant_id, reserved_quota_charge); + }; + ScopedAllocatorAccess allocator_access = segment_manager_.getAllocatorAccess(); const auto& allocator_manager = allocator_access.getAllocatorManager(); @@ -3633,11 +4018,22 @@ tl::expected MasterService::MoveStart( if (!replica.has_value()) { LOG(ERROR) << "key=" << key << ", tgt_segment=" << tgt_segment << ", failed to allocate replica"; + abort_reserved_quota(); return tl::make_unexpected(replica.error()); } replicas.push_back(std::move(*replica)); + } else { + auto quota_result = ReserveTenantQuota(object_id.tenant_id, 0); + if (!quota_result) { + return tl::make_unexpected(quota_result.error()); + } } + const uint64_t reserved_quota_charge = + replicas.empty() + ? 0 + : SaturatingMultiply(static_cast(metadata.size), 1); + MoveStartResponse response; std::vector replica_ids; @@ -3651,11 +4047,15 @@ tl::expected MasterService::MoveStart( // Create replication task for tracking. auto& tenant_state = accessor.GetTenantState(); - tenant_state.replication_tasks.emplace( + auto task_insert = tenant_state.replication_tasks.emplace( std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple(client_id, std::chrono::system_clock::now(), ReplicationTask::Type::MOVE, source->id(), - std::move(replica_ids))); + std::move(replica_ids), reserved_quota_charge)); + if (!task_insert.second) { + AbortTenantQuota(object_id.tenant_id, reserved_quota_charge); + return tl::make_unexpected(ErrorCode::OBJECT_HAS_REPLICATION_TASK); + } // Increase source refcnt to protect it from eviction. source->inc_refcnt(); @@ -3671,7 +4071,8 @@ tl::expected MasterService::MoveEnd( const UUID& client_id, const std::string& key, const std::string& tenant_id) { std::shared_lock shared_lock(snapshot_mutex_); - MetadataAccessorRW accessor(this, MakeObjectIdentity(key, tenant_id)); + MetadataAccessorRW accessor(this, + MakeObjectIdentityForRequest(key, tenant_id)); if (!accessor.Exists()) { LOG(ERROR) << "key=" << key << ", error=object_not_found"; return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -3710,6 +4111,7 @@ tl::expected MasterService::MoveEnd( task.replica_ids.end(), replica.id()) != task.replica_ids.end(); }); + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); if (!metadata.IsValid()) { // Remove the object if it does not have any replicas. @@ -3731,6 +4133,7 @@ tl::expected MasterService::MoveEnd( LOG(WARNING) << "key=" << key << ", replica_id=" << replica_id << ", move target becomes invalid during data transfer"; + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); return tl::make_unexpected(ErrorCode::REPLICA_IS_GONE); } @@ -3752,6 +4155,7 @@ tl::expected MasterService::MoveEnd( std::chrono::system_clock::now() + put_start_release_timeout_sec_); } + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); return {}; @@ -3761,7 +4165,8 @@ tl::expected MasterService::MoveRevoke( const UUID& client_id, const std::string& key, const std::string& tenant_id) { std::shared_lock shared_lock(snapshot_mutex_); - MetadataAccessorRW accessor(this, MakeObjectIdentity(key, tenant_id)); + MetadataAccessorRW accessor(this, + MakeObjectIdentityForRequest(key, tenant_id)); if (!accessor.Exists()) { LOG(ERROR) << "key=" << key << ", error=object_not_found"; return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -3804,6 +4209,7 @@ tl::expected MasterService::MoveRevoke( }); } + AbortReplicationTaskQuota(metadata.tenant_id, task); accessor.EraseReplicationTask(); if (!metadata.IsValid()) { @@ -3817,7 +4223,7 @@ tl::expected MasterService::MoveRevoke( auto MasterService::Remove(const std::string& key, const std::string& tenant_id, bool force) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { VLOG(1) << "key=" << key << ", error=object_not_found"; @@ -3868,7 +4274,7 @@ auto MasterService::RemoveByRegex(const std::string& regex_pattern, } std::shared_lock shared_lock(snapshot_mutex_); - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); for (size_t i = 0; i < kNumShards; ++i) { MetadataShardAccessorRW shard(this, i); auto tenant_it = shard->tenants.find(normalized_tenant); @@ -3979,7 +4385,7 @@ long MasterService::RemoveAll(const std::string& tenant_id, bool force) { // calling std::chrono::steady_clock::now() std::shared_lock shared_lock(snapshot_mutex_); auto now = std::chrono::system_clock::now(); - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); for (size_t i = 0; i < kNumShards; i++) { MetadataShardAccessorRW shard(this, i); @@ -4021,7 +4427,7 @@ auto MasterService::BatchRemove(const std::vector& keys, const std::string& tenant_id, bool force) -> std::vector> { std::vector> results(keys.size()); - const auto normalized_tenant = NormalizeTenantId(tenant_id); + const auto normalized_tenant = NormalizeRequestTenantId(tenant_id); // Group keys by shard to reduce lock contention std::unordered_map& keys, // Clean up stale replica handles (consistent with single Remove) if (CleanupStaleHandles(it->second, alive_clients, &shard)) { tenant_state.processing_keys.erase(key); - tenant_state.replication_tasks.erase(key); + auto task_it = tenant_state.replication_tasks.find(key); + if (task_it != tenant_state.replication_tasks.end()) { + AbortReplicationTaskQuota(normalized_tenant, + task_it->second); + tenant_state.replication_tasks.erase(task_it); + } tenant_state.offloading_tasks.erase(key); ErasePromotionTaskIfPresent(tenant_state, key, normalized_tenant); @@ -4362,16 +4773,31 @@ auto MasterService::NotifyOffloadSuccess( for (size_t i = 0; i < tasks.size(); ++i) { const auto& task = tasks[i]; const auto& metadata = metadatas[i]; - const auto object_id = MakeObjectIdentity(task.key, task.tenant_id); + const auto request_object_id = + MakeObjectIdentityForRequest(task.key, task.tenant_id); - // Release refcnt and clear offloading task. + Replica replica(client_id, metadata.data_size, + metadata.transport_endpoint, ReplicaStatus::COMPLETE); + bool handled_existing_object = false; + bool added_new_local_disk_replica = false; { - MetadataAccessorRW accessor(this, object_id); + std::shared_lock shared_lock(snapshot_mutex_); + MetadataAccessorRW accessor(this, request_object_id); if (accessor.Exists()) { auto& obj_metadata = accessor.Get(); auto& tenant_state = accessor.GetTenantState(); - auto task_it = - tenant_state.offloading_tasks.find(object_id.user_key); + auto task_it = tenant_state.offloading_tasks.find( + request_object_id.user_key); + if (task_it != tenant_state.offloading_tasks.end() && + replica.type() != ReplicaType::LOCAL_DISK) { + LOG(ERROR) << "Invalid replica type: " << replica.type() + << ". Expected ReplicaType::LOCAL_DISK."; + return tl::make_unexpected(ErrorCode::INVALID_PARAMS); + } + + // Existing orphan objects can only bypass tenant registration + // for a master-admitted offload completion. Without this task + // marker, fall through to the regular registration check. if (task_it != tenant_state.offloading_tasks.end()) { auto source = obj_metadata.GetReplicaByID(task_it->second.source_id); @@ -4379,26 +4805,68 @@ auto MasterService::NotifyOffloadSuccess( source->dec_refcnt(); } tenant_state.offloading_tasks.erase(task_it); + + if (!obj_metadata.HasReplica( + &Replica::fn_is_local_disk_replica)) { + std::vector replicas; + replicas.emplace_back(std::move(replica)); + obj_metadata.AddReplicas(std::move(replicas)); + auto& shard = accessor.GetShard(); + shard.OnDiskReplicaAdded(obj_metadata); + added_new_local_disk_replica = true; + } else { + obj_metadata.VisitReplicas( + [client_id](const Replica& rep) { + return rep.type() == ReplicaType::LOCAL_DISK && + rep.get_descriptor() + .get_local_disk_descriptor() + .client_id == client_id; + }, + [&replica](Replica& rep) { + rep.get_descriptor() + .get_local_disk_descriptor() + .transport_endpoint = + replica.get_descriptor() + .get_local_disk_descriptor() + .transport_endpoint; + rep.get_descriptor() + .get_local_disk_descriptor() + .object_size = + replica.get_descriptor() + .get_local_disk_descriptor() + .object_size; + }); + } + handled_existing_object = true; } } } - // Add LOCAL_DISK replica. - Replica replica(client_id, metadata.data_size, - metadata.transport_endpoint, ReplicaStatus::COMPLETE); - auto res = AddReplica(client_id, object_id.user_key, - object_id.tenant_id, replica); - if (!res) { - if (res.error() == ErrorCode::OBJECT_NOT_FOUND) { - continue; + if (!handled_existing_object) { + auto normalized_tenant_result = + NormalizeTenantIdForWrite(task.tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); } - LOG(ERROR) << "Failed to add replica: error=" << res.error() - << ", client_id=" << client_id - << ", tenant_id=" << object_id.tenant_id - << ", key=" << object_id.user_key; - return tl::make_unexpected(res.error()); + const ObjectIdentity object_id{normalized_tenant_result.value(), + task.key}; + + auto res = AddReplica(client_id, object_id.user_key, + object_id.tenant_id, replica); + if (!res) { + if (res.error() == ErrorCode::OBJECT_NOT_FOUND) { + continue; + } + LOG(ERROR) << "Failed to add replica: error=" << res.error() + << ", client_id=" << client_id + << ", tenant_id=" << object_id.tenant_id + << ", key=" << object_id.user_key; + return tl::make_unexpected(res.error()); + } + added_new_local_disk_replica = res.value(); } - if (local_disk_segment && metadata.data_size > 0) { + if (local_disk_segment && metadata.data_size > 0 && + added_new_local_disk_replica) { local_disk_segment->ssd_used_bytes.fetch_add( metadata.data_size, std::memory_order_relaxed); } @@ -4630,8 +5098,12 @@ auto MasterService::PromotionAllocStart( const UUID& client_id, const std::string& key, const std::string& tenant_id, uint64_t size, const std::vector& preferred_segments) -> tl::expected { + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -4745,7 +5217,7 @@ auto MasterService::NotifyPromotionSuccess(const UUID& client_id, const std::string& tenant_id) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -4840,7 +5312,7 @@ auto MasterService::NotifyPromotionFailure(const UUID& client_id, const std::string& tenant_id) -> tl::expected { std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); + const auto object_id = MakeObjectIdentityForRequest(key, tenant_id); MetadataAccessorRW accessor(this, object_id); if (!accessor.Exists()) { return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); @@ -5017,6 +5489,7 @@ void MasterService::DiscardExpiredProcessingReplicas( if (metadata_it == tenant_state.metadata.end()) { LOG(ERROR) << "Key " << task_it->first << " was removed with ongoing replication task"; + AbortReplicationTaskQuota(tenant_it->first, task_it->second); task_it = tenant_state.replication_tasks.erase(task_it); continue; } @@ -5048,6 +5521,7 @@ void MasterService::DiscardExpiredProcessingReplicas( EraseMetadata(tenant_state, metadata_it, tenant_it->first, QuotaEraseMode::kFull, &shard); } + AbortReplicationTaskQuota(tenant_it->first, task_it->second); task_it = tenant_state.replication_tasks.erase(task_it); } @@ -5519,7 +5993,6 @@ tl::expected MasterService::PersistState( MetadataSerializer metadata_serializer(this); SegmentSerializer segment_serializer(&segment_manager_); TaskManagerSerializer task_manager_serializer(&task_manager_); - TenantQuotaPolicySerializer tenant_quota_policy_serializer(this); auto metadata_result = metadata_serializer.Serialize(); if (!metadata_result) { @@ -5561,27 +6034,6 @@ tl::expected MasterService::PersistState( "[Snapshot] task manager serialization_successful, snapshot_id={}", snapshot_id); - std::vector serialized_tenant_quota_policy; - if (enable_tenant_quota_) { - auto tenant_quota_policy_result = - tenant_quota_policy_serializer.Serialize(); - if (!tenant_quota_policy_result) { - SNAP_LOG_ERROR( - "[Snapshot] tenant quota policy serialization failed, " - "snapshot_id={}, code={}, msg={}", - snapshot_id, - toString(tenant_quota_policy_result.error().code), - tenant_quota_policy_result.error().message); - return tl::make_unexpected(tenant_quota_policy_result.error()); - } - serialized_tenant_quota_policy = - std::move(tenant_quota_policy_result.value()); - SNAP_LOG_INFO( - "[Snapshot] tenant quota policy serialization_successful, " - "snapshot_id={}", - snapshot_id); - } - const auto& serialized_metadata = metadata_result.value(); const auto& serialized_segment = segment_result.value(); const auto& serialized_task_manager = task_manager_result.value(); @@ -5650,27 +6102,6 @@ tl::expected MasterService::PersistState( upload_success = false; } - if (enable_tenant_quota_) { - std::string tenant_quota_policy_path = - path_prefix + SNAPSHOT_TENANT_QUOTA_POLICY_FILE; - upload_result = UploadSnapshotPayloadFile( - serialized_tenant_quota_policy, tenant_quota_policy_path, - SNAPSHOT_TENANT_QUOTA_POLICY_FILE, snapshot_id); - if (!upload_result) { - SNAP_LOG_ERROR( - "[Snapshot] tenant_quota_policy upload failed, " - "snapshot_id={}, path={}, code={}, msg={}", - snapshot_id, tenant_quota_policy_path, - toString(upload_result.error().code), - upload_result.error().message); - if (!use_snapshot_backup_dir_) { - return tl::make_unexpected(upload_result.error()); - } - error_msg.append(upload_result.error().message + "\n"); - upload_success = false; - } - } - // Upload manifest std::string manifest_content = fmt::format("{}|{}|{}", SNAPSHOT_SERIALIZER_TYPE, @@ -6055,50 +6486,9 @@ bool MasterService::TryRestoreStateFromSnapshot( } LOG(INFO) << "[Restore] Download task manager file success"; - bool has_tenant_quota_policy = false; - std::vector tenant_quota_policy_content; - if (enable_tenant_quota_) { - std::string tenant_quota_policy_path = - path_prefix + SNAPSHOT_TENANT_QUOTA_POLICY_FILE; - download_result = snapshot_object_store_->DownloadBuffer( - tenant_quota_policy_path, tenant_quota_policy_content); - if (!download_result) { - if (snapshot_object_store_->IsNotFoundError( - download_result.error())) { - LOG(INFO) - << "[Restore] Tenant quota policy file is missing in " - "snapshot " - << state_id << ", treating as legacy snapshot"; - } else { - return fail_restore( - "failed to download tenant_quota_policy '" + - tenant_quota_policy_path + - "': " + download_result.error()); - } - } else { - has_tenant_quota_policy = true; - if (use_snapshot_backup_dir_) { - auto save_result = FileUtil::SaveBinaryToFile( - tenant_quota_policy_content, - fs::path(snapshot_backup_dir_) / - SNAPSHOT_BACKUP_RESTORE_DIR / - SNAPSHOT_TENANT_QUOTA_POLICY_FILE); - if (!save_result) { - LOG(ERROR) - << "[Restore] Failed to save tenant quota policy " - "to file: " - << save_result.error(); - } - } - LOG(INFO) - << "[Restore] Download tenant quota policy file success"; - } - } - SegmentSerializer segment_serializer(&segment_manager_); MetadataSerializer metadata_serializer(this); TaskManagerSerializer task_manager_serializer(&task_manager_); - TenantQuotaPolicySerializer tenant_quota_policy_serializer(this); auto segments_result = segment_serializer.Deserialize(segments_content); if (!segments_result) { @@ -6119,19 +6509,6 @@ bool MasterService::TryRestoreStateFromSnapshot( } LOG(INFO) << "[Restore] Deserialize metadata success"; - if (has_tenant_quota_policy) { - auto tenant_quota_policy_result = - tenant_quota_policy_serializer.Deserialize( - tenant_quota_policy_content); - if (!tenant_quota_policy_result) { - return fail_restore(fmt::format( - "failed to deserialize tenant quota policy: {} - {}", - static_cast(tenant_quota_policy_result.error().code), - tenant_quota_policy_result.error().message)); - } - LOG(INFO) << "[Restore] Deserialize tenant quota policy success"; - } - auto task_manager_result = task_manager_serializer.Deserialize(task_manager_content); if (!task_manager_result) { @@ -6261,7 +6638,6 @@ bool MasterService::TryRestoreStateFromSnapshot( LOG(INFO) << "[Restore] Successfully restored state from snapshot: " << state_id; - RebuildTenantQuotaUsageFromMetadata(); return true; } catch (const std::exception& e) { return fail_restore("exception during state restoration: " + @@ -6275,10 +6651,8 @@ void MasterService::ResetStateAfterFailedRestoreAttempt() { SegmentSerializer segment_serializer(&segment_manager_); MetadataSerializer metadata_serializer(this); TaskManagerSerializer task_manager_serializer(&task_manager_); - TenantQuotaPolicySerializer tenant_quota_policy_serializer(this); task_manager_serializer.Reset(); - tenant_quota_policy_serializer.Reset(); metadata_serializer.Reset(); segment_serializer.Reset(); @@ -6303,7 +6677,7 @@ MasterService::TenantQuotaEvictionResult MasterService::EvictTenantMemoryForQuota(const std::string& tenant_id, uint64_t target_bytes) { TenantQuotaEvictionResult total; - if (!enable_tenant_quota_ || target_bytes == 0) { + if (!enable_multi_tenants_ || target_bytes == 0) { return total; } @@ -6454,9 +6828,11 @@ MasterService::EvictTenantMemoryForQuota(const std::string& tenant_id, }; auto pass = [&](bool allow_soft_pinned) { - for (size_t shard_idx = 0; - shard_idx < kNumShards && total.freed_bytes < target_bytes; - ++shard_idx) { + const size_t start_shard = RandomIndex(kNumShards); + for (size_t scanned = 0; + scanned < kNumShards && total.freed_bytes < target_bytes; + ++scanned) { + const size_t shard_idx = (start_shard + scanned) % kNumShards; std::vector> deferred_replicas; { MetadataShardAccessorRW shard(this, shard_idx); @@ -7523,150 +7899,6 @@ void MasterService::NofHeartbeatThreadFunc() { } } -tl::expected, SerializationError> -MasterService::TenantQuotaPolicySerializer::Serialize() { - if (!service_) { - return tl::make_unexpected( - SerializationError(ErrorCode::SERIALIZE_FAIL, - "serialize TenantQuotaPolicy service_ is null")); - } - - std::map explicit_policies; - for (size_t i = 0; i < kNumTenantQuotaShards; ++i) { - const auto& shard = service_->tenant_quota_shards_[i]; - std::lock_guard lock(shard.mutex); - for (const auto& [tenant_id, state] : shard.tenants) { - if (state.has_explicit_policy) { - explicit_policies[tenant_id] = state.requested_quota_bytes; - } - } - } - - msgpack::sbuffer sbuf; - msgpack::packer packer(&sbuf); - packer.pack_array(3); - packer.pack(static_cast(1)); - packer.pack( - service_->default_tenant_quota_bytes_.load(std::memory_order_relaxed)); - packer.pack_array(explicit_policies.size()); - for (const auto& [tenant_id, requested_quota_bytes] : explicit_policies) { - packer.pack_array(2); - packer.pack(tenant_id); - packer.pack(requested_quota_bytes); - } - - return std::vector( - reinterpret_cast(sbuf.data()), - reinterpret_cast(sbuf.data()) + sbuf.size()); -} - -tl::expected -MasterService::TenantQuotaPolicySerializer::Deserialize( - const std::vector& data) { - if (!service_) { - return tl::make_unexpected(SerializationError( - ErrorCode::DESERIALIZE_FAIL, - "deserialize TenantQuotaPolicy service_ is null")); - } - - msgpack::object_handle oh; - try { - oh = msgpack::unpack(reinterpret_cast(data.data()), - data.size()); - } catch (const std::exception& e) { - return tl::make_unexpected(SerializationError( - ErrorCode::DESERIALIZE_FAIL, - "failed to unpack tenant quota policy msgpack: " + - std::string(e.what()))); - } - - const msgpack::object& obj = oh.get(); - if (obj.type != msgpack::type::ARRAY || obj.via.array.size != 3) { - return tl::make_unexpected( - SerializationError(ErrorCode::DESERIALIZE_FAIL, - "invalid tenant quota policy root format")); - } - - const auto* fields = obj.via.array.ptr; - uint32_t version = 0; - uint64_t default_requested_quota_bytes = 0; - try { - version = fields[0].as(); - default_requested_quota_bytes = fields[1].as(); - } catch (const std::exception& e) { - return tl::make_unexpected(SerializationError( - ErrorCode::DESERIALIZE_FAIL, - "invalid tenant quota policy header: " + std::string(e.what()))); - } - if (version != 1) { - return tl::make_unexpected( - SerializationError(ErrorCode::DESERIALIZE_FAIL, - "unsupported tenant quota policy version: " + - std::to_string(version))); - } - - const msgpack::object& policies = fields[2]; - if (policies.type != msgpack::type::ARRAY) { - return tl::make_unexpected( - SerializationError(ErrorCode::DESERIALIZE_FAIL, - "invalid tenant quota policy list format")); - } - - std::map explicit_policies; - for (uint32_t i = 0; i < policies.via.array.size; ++i) { - const auto& policy = policies.via.array.ptr[i]; - if (policy.type != msgpack::type::ARRAY || policy.via.array.size != 2) { - return tl::make_unexpected( - SerializationError(ErrorCode::DESERIALIZE_FAIL, - "invalid tenant quota policy entry format")); - } - try { - std::string tenant_id = - NormalizeTenantId(policy.via.array.ptr[0].as()); - uint64_t requested_quota_bytes = - policy.via.array.ptr[1].as(); - if (requested_quota_bytes == 0) { - return tl::make_unexpected(SerializationError( - ErrorCode::DESERIALIZE_FAIL, - "explicit tenant quota policy must be positive")); - } - explicit_policies[std::move(tenant_id)] = requested_quota_bytes; - } catch (const std::exception& e) { - return tl::make_unexpected(SerializationError( - ErrorCode::DESERIALIZE_FAIL, - "invalid tenant quota policy entry: " + std::string(e.what()))); - } - } - - Reset(); - service_->default_tenant_quota_bytes_.store(default_requested_quota_bytes, - std::memory_order_relaxed); - for (const auto& [tenant_id, requested_quota_bytes] : explicit_policies) { - auto& shard = - service_->tenant_quota_shards_[service_->getTenantQuotaShardIndex( - tenant_id)]; - std::lock_guard lock(shard.mutex); - auto& state = shard.tenants[tenant_id]; - state.requested_quota_bytes = requested_quota_bytes; - state.has_explicit_policy = true; - } - return {}; -} - -void MasterService::TenantQuotaPolicySerializer::Reset() { - if (!service_) { - return; - } - service_->default_tenant_quota_bytes_.store( - service_->configured_default_tenant_quota_bytes_, - std::memory_order_relaxed); - for (size_t i = 0; i < kNumTenantQuotaShards; ++i) { - auto& shard = service_->tenant_quota_shards_[i]; - std::lock_guard lock(shard.mutex); - shard.tenants.clear(); - } -} - tl::expected, SerializationError> MasterService::MetadataSerializer::Serialize() { msgpack::sbuffer sbuf; @@ -8260,8 +8492,12 @@ std::string MasterService::FormatTimestamp( tl::expected MasterService::CreateCopyTask( const std::string& key, const std::string& tenant_id, const std::vector& targets) { + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); if (targets.empty()) { LOG(ERROR) << "key=" << key << ", error=empty_targets"; return tl::make_unexpected(ErrorCode::INVALID_PARAMS); @@ -8318,8 +8554,12 @@ tl::expected MasterService::CreateCopyTask( tl::expected MasterService::CreateMoveTask( const std::string& key, const std::string& tenant_id, const std::string& source, const std::string& target) { + auto normalized_tenant_result = NormalizeTenantIdForWrite(tenant_id); + if (!normalized_tenant_result) { + return tl::make_unexpected(normalized_tenant_result.error()); + } + const ObjectIdentity object_id{normalized_tenant_result.value(), key}; std::shared_lock shared_lock(snapshot_mutex_); - const auto object_id = MakeObjectIdentity(key, tenant_id); MetadataAccessorRO accessor(this, object_id); if (!accessor.Exists()) { VLOG(1) << "key=" << key << ", info=object_not_found"; diff --git a/mooncake-store/src/rpc_service.cpp b/mooncake-store/src/rpc_service.cpp index a85b93490d..493a496c23 100644 --- a/mooncake-store/src/rpc_service.cpp +++ b/mooncake-store/src/rpc_service.cpp @@ -1100,30 +1100,9 @@ WrappedMasterService::DeleteTenantQuotaPolicy(const std::string& tenant_id) { if (!master_service_.IsTenantQuotaEnabled()) { return tl::make_unexpected(ErrorCode::UNAVAILABLE_IN_CURRENT_MODE); } - auto before = master_service_.GetTenantQuotaSnapshot(tenant_id); - if (!before.has_value()) { - return tl::make_unexpected(ErrorCode::OBJECT_NOT_FOUND); - } return master_service_.DeleteTenantQuotaPolicy(tenant_id); } -tl::expected -WrappedMasterService::GetDefaultTenantQuotaPolicy() { - if (!master_service_.IsTenantQuotaEnabled()) { - return tl::make_unexpected(ErrorCode::UNAVAILABLE_IN_CURRENT_MODE); - } - return master_service_.GetDefaultTenantQuotaPolicy(); -} - -tl::expected WrappedMasterService::SetDefaultTenantQuotaPolicy( - uint64_t requested_quota_bytes) { - if (!master_service_.IsTenantQuotaEnabled()) { - return tl::make_unexpected(ErrorCode::UNAVAILABLE_IN_CURRENT_MODE); - } - master_service_.SetDefaultTenantQuotaPolicy(requested_quota_bytes); - return {}; -} - tl::expected WrappedMasterService::GetTenantQuotaAllocatableCapacityBytes() { if (!master_service_.IsTenantQuotaEnabled()) { diff --git a/mooncake-store/src/tenant_quota.cpp b/mooncake-store/src/tenant_quota.cpp index 732fa4e7f6..da567c48ea 100644 --- a/mooncake-store/src/tenant_quota.cpp +++ b/mooncake-store/src/tenant_quota.cpp @@ -25,7 +25,8 @@ uint64_t SaturatingAdd(uint64_t lhs, uint64_t rhs) { bool IsLazyEmptyTenant(const TenantQuotaState& state) { return !state.has_explicit_policy && state.used_bytes == 0 && - state.reserved_bytes == 0 && state.committed_count == 0; + state.reserved_bytes == 0 && state.committed_count == 0 && + state.metadata_object_count == 0; } TenantQuotaResult AccountingMismatch(const char* operation, @@ -40,18 +41,14 @@ TenantQuotaResult AccountingMismatch(const char* operation, std::vector BuildEffectiveQuotaAssignments( const std::map& tenants, - uint64_t default_requested_quota_bytes, uint64_t allocatable_capacity_bytes) { unsigned __int128 explicit_requested_sum = 0; std::vector explicit_tenants; - std::vector default_tenants; for (const auto& [tenant_id, state] : tenants) { if (state.has_explicit_policy) { explicit_tenants.push_back(tenant_id); explicit_requested_sum += state.requested_quota_bytes; - } else if (!IsLazyEmptyTenant(state)) { - default_tenants.push_back(tenant_id); } } @@ -104,11 +101,6 @@ std::vector BuildEffectiveQuotaAssignments( for (const auto& tenant_id : explicit_tenants) { assigned[tenant_id] = tenants.at(tenant_id).requested_quota_bytes; } - const uint64_t remaining_capacity = - allocatable_capacity_bytes - - static_cast(explicit_requested_sum); - distribute(default_tenants, remaining_capacity, default_tenants.size(), - /*proportional_to_requested=*/false); } else { distribute(explicit_tenants, allocatable_capacity_bytes, explicit_requested_sum, @@ -124,19 +116,6 @@ std::vector BuildEffectiveQuotaAssignments( return result; } -void TenantQuotaTable::SetDefaultRequestedQuota(uint64_t bytes) { - default_requested_quota_bytes_ = bytes; - for (auto& [_, state] : tenants_) { - if (!state.has_explicit_policy) { - state.requested_quota_bytes = default_requested_quota_bytes_; - } - } -} - -uint64_t TenantQuotaTable::GetDefaultRequestedQuota() const { - return default_requested_quota_bytes_; -} - TenantQuotaResult TenantQuotaTable::UpsertTenantPolicy( std::string tenant_id, uint64_t requested_quota_bytes) { if (requested_quota_bytes == 0) { @@ -157,22 +136,19 @@ void TenantQuotaTable::EraseTenantPolicy(std::string tenant_id) { return; } auto& state = it->second; - state.requested_quota_bytes = default_requested_quota_bytes_; + state.requested_quota_bytes = 0; + state.effective_quota_bytes = 0; state.has_explicit_policy = false; } void TenantQuotaTable::RecomputeEffectiveQuotas( uint64_t allocatable_capacity_bytes) { for (auto& [tenant_id, state] : tenants_) { - if (!state.has_explicit_policy) { - state.requested_quota_bytes = default_requested_quota_bytes_; - } state.effective_quota_bytes = 0; } - for (const auto& assignment : BuildEffectiveQuotaAssignments( - tenants_, default_requested_quota_bytes_, - allocatable_capacity_bytes)) { + for (const auto& assignment : + BuildEffectiveQuotaAssignments(tenants_, allocatable_capacity_bytes)) { tenants_.at(assignment.tenant_id).effective_quota_bytes = assignment.effective_quota_bytes; } @@ -216,6 +192,9 @@ TenantQuotaResult TenantQuotaTable::Reserve(std::string tenant_id, return tl::make_unexpected(TenantQuotaError::kQuotaExceeded); } auto& state = it->second; + if (!state.has_explicit_policy) { + return tl::make_unexpected(TenantQuotaError::kQuotaExceeded); + } if (static_cast(state.used_bytes) + state.reserved_bytes + bytes > @@ -313,7 +292,8 @@ TenantQuotaState& TenantQuotaTable::GetOrCreateState( const std::string& tenant_id) { auto [it, inserted] = tenants_.try_emplace(tenant_id); if (inserted) { - it->second.requested_quota_bytes = default_requested_quota_bytes_; + it->second.requested_quota_bytes = 0; + it->second.effective_quota_bytes = 0; } return it->second; } @@ -327,15 +307,18 @@ TenantQuotaSnapshot TenantQuotaTable::MakeSnapshot( .used_bytes = state.used_bytes, .reserved_bytes = state.reserved_bytes, .committed_count = state.committed_count, + .metadata_object_count = state.metadata_object_count, .has_explicit_policy = state.has_explicit_policy, .over_quota = state.over_quota, }; } void TenantQuotaTable::RefreshOverQuota(TenantQuotaState* state) const { - state->over_quota = static_cast(state->used_bytes) + - state->reserved_bytes > - state->effective_quota_bytes; + state->over_quota = + (!state->has_explicit_policy && state->metadata_object_count > 0) || + static_cast(state->used_bytes) + + state->reserved_bytes > + state->effective_quota_bytes; } } // namespace mooncake diff --git a/mooncake-store/src/tenant_quota_policy_store.cpp b/mooncake-store/src/tenant_quota_policy_store.cpp new file mode 100644 index 0000000000..23e10b7099 --- /dev/null +++ b/mooncake-store/src/tenant_quota_policy_store.cpp @@ -0,0 +1,361 @@ +#include "tenant_quota_policy_store.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "types.h" + +namespace mooncake { +namespace { + +bool IsValidTenantQuotaName(const std::string& name) { + return IsValidTenantId(name); +} + +std::string ErrnoMessage(const std::string& action, const std::string& path) { + return action + " '" + path + "' failed: " + std::strerror(errno); +} + +tl::expected WriteAll(int fd, const std::string& content, + const std::string& path) { + const char* data = content.data(); + size_t remaining = content.size(); + while (remaining > 0) { + ssize_t written = ::write(fd, data, remaining); + if (written < 0) { + if (errno == EINTR) { + continue; + } + return tl::make_unexpected(ErrnoMessage("write", path)); + } + if (written == 0) { + return tl::make_unexpected("write '" + path + "' made no progress"); + } + data += written; + remaining -= static_cast(written); + } + return {}; +} + +tl::expected FsyncDirectory(const std::string& path) { + std::filesystem::path dir = std::filesystem::path(path).parent_path(); + if (dir.empty()) { + dir = "."; + } + int fd = ::open(dir.string().c_str(), O_RDONLY | O_DIRECTORY); + if (fd < 0) { + return tl::make_unexpected( + ErrnoMessage("open directory", dir.string())); + } + auto close_fd = [&] { ::close(fd); }; + if (::fsync(fd) != 0) { + std::string error = ErrnoMessage("fsync directory", dir.string()); + close_fd(); + return tl::make_unexpected(error); + } + close_fd(); + return {}; +} + +std::string MakeTempPath(const std::string& path) { + const auto now = + std::chrono::steady_clock::now().time_since_epoch().count(); + std::ostringstream oss; + oss << path << ".tmp." << ::getpid() << "." + << std::hash{}(std::this_thread::get_id()) << "." + << now; + return oss.str(); +} + +std::string QuoteYamlDoubleQuotedScalar(const std::string& value) { + std::ostringstream out; + out << '"'; + for (unsigned char c : value) { + switch (c) { + case '\\': + out << "\\\\"; + break; + case '"': + out << "\\\""; + break; + case '\0': + out << "\\0"; + break; + case '\a': + out << "\\a"; + break; + case '\b': + out << "\\b"; + break; + case '\t': + out << "\\t"; + break; + case '\n': + out << "\\n"; + break; + case '\v': + out << "\\v"; + break; + case '\f': + out << "\\f"; + break; + case '\r': + out << "\\r"; + break; + default: + if (c < 0x20 || c == 0x7f) { + out << "\\x" << std::uppercase << std::hex << std::setw(2) + << std::setfill('0') << static_cast(c) << std::dec + << std::nouppercase << std::setfill(' '); + } else { + out << static_cast(c); + } + break; + } + } + out << '"'; + return out.str(); +} + +} // namespace + +tl::expected ParseTenantQuotaBytes( + const std::string& value) { + if (value.empty()) { + return tl::make_unexpected("quota must not be empty"); + } + + size_t digits = 0; + while (digits < value.size() && value[digits] >= '0' && + value[digits] <= '9') { + ++digits; + } + if (digits == 0) { + return tl::make_unexpected("quota must start with an integer"); + } + + uint64_t number = 0; + for (size_t i = 0; i < digits; ++i) { + const uint64_t digit = static_cast(value[i] - '0'); + if (number > (std::numeric_limits::max() - digit) / 10) { + return tl::make_unexpected("quota integer overflows uint64"); + } + number = number * 10 + digit; + } + if (number == 0) { + return tl::make_unexpected("quota must be positive"); + } + + const std::string unit = value.substr(digits); + uint64_t multiplier = 1; + if (unit.empty() || unit == "B") { + multiplier = 1; + } else if (unit == "KB") { + multiplier = 1024ULL; + } else if (unit == "MB") { + multiplier = 1024ULL * 1024ULL; + } else if (unit == "GB") { + multiplier = 1024ULL * 1024ULL * 1024ULL; + } else if (unit == "TB") { + multiplier = 1024ULL * 1024ULL * 1024ULL * 1024ULL; + } else { + return tl::make_unexpected("unsupported quota unit '" + unit + "'"); + } + + if (number > std::numeric_limits::max() / multiplier) { + return tl::make_unexpected("quota byte value overflows uint64"); + } + return number * multiplier; +} + +tl::expected ParseTenantQuotaPolicyYaml( + const std::string& yaml) { + YAML::Node root; + try { + root = YAML::Load(yaml); + } catch (const YAML::Exception& e) { + return tl::make_unexpected(std::string("invalid YAML: ") + e.what()); + } + + if (!root || !root.IsMap()) { + return tl::make_unexpected("tenant quota policy must be a YAML map"); + } + const auto version_node = root["version"]; + if (!version_node || !version_node.IsScalar()) { + return tl::make_unexpected("tenant quota policy version is required"); + } + int version = 0; + try { + version = version_node.as(); + } catch (const YAML::Exception& e) { + return tl::make_unexpected(std::string("invalid version: ") + e.what()); + } + if (version != 1) { + return tl::make_unexpected("unsupported tenant quota policy version: " + + std::to_string(version)); + } + + const auto tenants_node = root["tenants"]; + if (!tenants_node || !tenants_node.IsSequence()) { + return tl::make_unexpected("tenants must be a YAML sequence"); + } + + TenantQuotaPolicySnapshot snapshot; + for (size_t i = 0; i < tenants_node.size(); ++i) { + const auto entry = tenants_node[i]; + if (!entry || !entry.IsMap()) { + return tl::make_unexpected("tenant entry must be a YAML map"); + } + const auto name_node = entry["name"]; + const auto quota_node = entry["quota"]; + if (!name_node || !name_node.IsScalar()) { + return tl::make_unexpected("tenant name is required"); + } + if (!quota_node || !quota_node.IsScalar()) { + return tl::make_unexpected("tenant quota is required"); + } + + std::string name; + std::string quota; + try { + name = name_node.as(); + quota = quota_node.as(); + } catch (const YAML::Exception& e) { + return tl::make_unexpected(std::string("invalid tenant entry: ") + + e.what()); + } + if (!IsValidTenantQuotaName(name)) { + return tl::make_unexpected("invalid tenant name '" + name + "'"); + } + name = NormalizeTenantId(name); + if (!IsValidTenantQuotaName(name)) { + return tl::make_unexpected("invalid tenant name '" + name + "'"); + } + if (snapshot.tenant_quotas.contains(name)) { + return tl::make_unexpected("duplicate tenant name '" + name + "'"); + } + + auto quota_bytes = ParseTenantQuotaBytes(quota); + if (!quota_bytes) { + return tl::make_unexpected("invalid quota for tenant '" + name + + "': " + quota_bytes.error()); + } + snapshot.tenant_quotas.emplace(std::move(name), quota_bytes.value()); + } + + return snapshot; +} + +std::string FormatTenantQuotaPolicyYaml( + const TenantQuotaPolicySnapshot& snapshot) { + std::ostringstream out; + out << "version: 1\n\n"; + if (snapshot.tenant_quotas.empty()) { + out << "tenants: []\n"; + return out.str(); + } + out << "tenants:\n"; + for (const auto& [tenant_id, quota] : snapshot.tenant_quotas) { + out << " - name: " << QuoteYamlDoubleQuotedScalar(tenant_id) << "\n"; + out << " quota: " << quota << "\n"; + } + return out.str(); +} + +YamlTenantQuotaPolicyStore::YamlTenantQuotaPolicyStore(std::string path) + : path_(std::move(path)) {} + +tl::expected +YamlTenantQuotaPolicyStore::Load() { + std::lock_guard lock(mutex_); + std::ifstream input(path_); + if (!input.is_open()) { + return tl::make_unexpected("failed to open tenant quota policy file '" + + path_ + "'"); + } + std::ostringstream buffer; + buffer << input.rdbuf(); + if (input.bad()) { + return tl::make_unexpected("failed to read tenant quota policy file '" + + path_ + "'"); + } + return ParseTenantQuotaPolicyYaml(buffer.str()); +} + +tl::expected YamlTenantQuotaPolicyStore::Save( + const TenantQuotaPolicySnapshot& snapshot) { + std::lock_guard lock(mutex_); + const std::string content = FormatTenantQuotaPolicyYaml(snapshot); + const std::string tmp_path = MakeTempPath(path_); + + int fd = ::open(tmp_path.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644); + if (fd < 0) { + return tl::make_unexpected(ErrnoMessage("open", tmp_path)); + } + + auto cleanup = [&] { + ::close(fd); + ::unlink(tmp_path.c_str()); + }; + + auto write_result = WriteAll(fd, content, tmp_path); + if (!write_result) { + cleanup(); + return tl::make_unexpected(write_result.error()); + } + if (::fsync(fd) != 0) { + std::string error = ErrnoMessage("fsync", tmp_path); + cleanup(); + return tl::make_unexpected(error); + } + if (::close(fd) != 0) { + std::string error = ErrnoMessage("close", tmp_path); + ::unlink(tmp_path.c_str()); + return tl::make_unexpected(error); + } + fd = -1; + + if (::rename(tmp_path.c_str(), path_.c_str()) != 0) { + std::string error = ErrnoMessage("rename", path_); + ::unlink(tmp_path.c_str()); + return tl::make_unexpected(error); + } + + auto fsync_result = FsyncDirectory(path_); + if (!fsync_result) { + LOG(WARNING) << "failed to fsync tenant quota policy directory after " + "rename: " + << fsync_result.error(); + } + return {}; +} + +tl::expected, std::string> +CreateTenantQuotaPolicyStore(const std::string& type, const std::string& uri) { + if (type != "file") { + return tl::make_unexpected("unsupported tenant quota connector type '" + + type + "'"); + } + if (uri.empty()) { + return tl::make_unexpected( + "tenant quota file connector requires a non-empty uri"); + } + return std::make_unique(uri); +} + +} // namespace mooncake diff --git a/mooncake-store/src/types.cpp b/mooncake-store/src/types.cpp index db39474eb8..68bdd7fcb7 100644 --- a/mooncake-store/src/types.cpp +++ b/mooncake-store/src/types.cpp @@ -74,7 +74,9 @@ const std::string& toString(ErrorCode errorCode) noexcept { {ErrorCode::DFS_PERMISSION_DENIED, "DFS_PERMISSION_DENIED"}, {ErrorCode::DFS_STALE_HANDLE, "DFS_STALE_HANDLE"}, {ErrorCode::DFS_PARTIAL_WRITE, "DFS_PARTIAL_WRITE"}, - {ErrorCode::TENANT_QUOTA_EXCEEDED, "TENANT_QUOTA_EXCEEDED"}}; + {ErrorCode::TENANT_QUOTA_EXCEEDED, "TENANT_QUOTA_EXCEEDED"}, + {ErrorCode::TENANT_NOT_REGISTERED, "TENANT_NOT_REGISTERED"}, + {ErrorCode::TENANT_NOT_EMPTY, "TENANT_NOT_EMPTY"}}; auto it = errorCodeMap.find(errorCode); static const std::string unknownError = "UNKNOWN_ERROR"; diff --git a/mooncake-store/tests/master_admin_server_test.cpp b/mooncake-store/tests/master_admin_server_test.cpp index 9365dced9c..5ef0cd62a7 100644 --- a/mooncake-store/tests/master_admin_server_test.cpp +++ b/mooncake-store/tests/master_admin_server_test.cpp @@ -3,6 +3,9 @@ #include #include +#include +#include +#include #include #include #include @@ -15,6 +18,7 @@ #include "master_admin_service.h" #include "master_config.h" #include "rpc_service.h" +#include "tenant_quota_policy_store.h" #include "types.h" #include "utils.h" @@ -82,6 +86,18 @@ struct HttpSegmentsDetailResponse { }; YLT_REFL(HttpSegmentsDetailResponse, total_segments); +std::string WriteTenantQuotaPolicyForTest( + const std::map& tenant_quotas) { + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = tenant_quotas; + auto path = std::filesystem::temp_directory_path() / + ("mooncake_admin_tenant_quota_" + + UuidToString(generate_uuid()) + ".yaml"); + std::ofstream out(path); + out << FormatTenantQuotaPolicyYaml(snapshot); + return path.string(); +} + } // namespace // ========================================================================= @@ -477,12 +493,13 @@ TEST_F(MasterAdminServerTest, ServiceEndpointsReturn503WhenServiceUnavailable) { } TEST_F(MasterAdminServerTest, TenantQuotaAdminLifecycleEndpoints) { + const std::string policy_path = WriteTenantQuotaPolicyForTest({}); WrappedMasterServiceConfig svc_config; svc_config.default_kv_lease_ttl = 5000; svc_config.enable_metric_reporting = false; - svc_config.enable_tenant_quota = true; - svc_config.default_tenant_quota_bytes = 1000; - svc_config.tenant_quota_pool_capacity_bytes = 2000; + svc_config.enable_multi_tenants = true; + svc_config.tenant_quota_connector_type = "file"; + svc_config.tenant_quota_connector_uri = policy_path; auto service = std::make_shared(svc_config); Segment segment; @@ -500,17 +517,6 @@ TEST_F(MasterAdminServerTest, TenantQuotaAdminLifecycleEndpoints) { admin.SetServiceDelegate(service); admin.SetServiceAvailable(true); - auto default_get = HttpGet(port, "/api/v1/tenant_quotas/default"); - EXPECT_EQ(default_get.http_status, 200); - EXPECT_NE(default_get.body.find("\"requested_quota_bytes\":1000"), - std::string::npos); - - auto default_put = HttpPutJson(port, "/api/v1/tenant_quotas/default", - "{\"requested_quota_bytes\":0}"); - EXPECT_EQ(default_put.http_status, 200); - EXPECT_NE(default_put.body.find("\"requested_quota_bytes\":0"), - std::string::npos); - auto upsert = HttpPutJson(port, "/api/v1/tenant_quotas?tenant_id=tenant-a", "{\"requested_quota_bytes\":800}"); EXPECT_EQ(upsert.http_status, 200); @@ -532,6 +538,25 @@ TEST_F(MasterAdminServerTest, TenantQuotaAdminLifecycleEndpoints) { EXPECT_NE(one.body.find("\"committed_count\":0"), std::string::npos); EXPECT_NE(one.body.find("\"over_quota\":false"), std::string::npos); + ReplicateConfig cfg; + cfg.replica_num = 1; + auto put = + service->PutStart(client_id, "quota_admin_key", 100, cfg, "tenant-a"); + ASSERT_TRUE(put.has_value()) << toString(put.error()); + ASSERT_TRUE(service + ->PutEnd(client_id, "quota_admin_key", ReplicaType::MEMORY, + "tenant-a") + .has_value()); + + auto delete_non_empty = + HttpDelete(port, "/api/v1/tenant_quotas?tenant_id=tenant-a"); + EXPECT_EQ(delete_non_empty.http_status, 409); + EXPECT_NE(delete_non_empty.body.find("TENANT_NOT_EMPTY"), + std::string::npos); + + ASSERT_TRUE(service->Remove("quota_admin_key", /*force=*/true, "tenant-a") + .has_value()); + auto deleted = HttpDelete(port, "/api/v1/tenant_quotas?tenant_id=tenant-a"); EXPECT_EQ(deleted.http_status, 200); @@ -539,15 +564,17 @@ TEST_F(MasterAdminServerTest, TenantQuotaAdminLifecycleEndpoints) { EXPECT_EQ(missing.http_status, 404); admin.Stop(); + std::filesystem::remove(policy_path); } TEST_F(MasterAdminServerTest, TenantQuotaAdminValidationErrors) { + const std::string policy_path = WriteTenantQuotaPolicyForTest({}); WrappedMasterServiceConfig svc_config; svc_config.default_kv_lease_ttl = 5000; svc_config.enable_metric_reporting = false; - svc_config.enable_tenant_quota = true; - svc_config.default_tenant_quota_bytes = 1000; - svc_config.tenant_quota_pool_capacity_bytes = 1000; + svc_config.enable_multi_tenants = true; + svc_config.tenant_quota_connector_type = "file"; + svc_config.tenant_quota_connector_uri = policy_path; auto service = std::make_shared(svc_config); int port = getFreeTcpPort(); @@ -579,13 +606,14 @@ TEST_F(MasterAdminServerTest, TenantQuotaAdminValidationErrors) { EXPECT_EQ(missing_query.http_status, 404); admin.Stop(); + std::filesystem::remove(policy_path); } TEST_F(MasterAdminServerTest, TenantQuotaAdminDisabledModeReturns409) { WrappedMasterServiceConfig svc_config; svc_config.default_kv_lease_ttl = 5000; svc_config.enable_metric_reporting = false; - svc_config.enable_tenant_quota = false; + svc_config.enable_multi_tenants = false; auto service = std::make_shared(svc_config); int port = getFreeTcpPort(); diff --git a/mooncake-store/tests/master_service_tenant_quota_test.cpp b/mooncake-store/tests/master_service_tenant_quota_test.cpp index abc8603676..3f3257ca81 100644 --- a/mooncake-store/tests/master_service_tenant_quota_test.cpp +++ b/mooncake-store/tests/master_service_tenant_quota_test.cpp @@ -1,30 +1,148 @@ #include "master_service.h" +#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include -#include +#include #include #include +#include +#include "allocation_strategy.h" +#include "tenant_quota_policy_store.h" #include "types.h" namespace mooncake::test { +class BlockingTenantQuotaPolicyStore final : public TenantQuotaPolicyStore { + public: + explicit BlockingTenantQuotaPolicyStore(TenantQuotaPolicySnapshot snapshot) + : snapshot_(std::move(snapshot)), + allow_save_(allow_save_promise_.get_future()) {} + + std::future SaveStarted() { + return save_started_promise_.get_future(); + } + + void AllowSave() { allow_save_promise_.set_value(); } + + tl::expected Load() override { + return snapshot_; + } + + tl::expected Save( + const TenantQuotaPolicySnapshot& snapshot) override { + snapshot_ = snapshot; + save_started_promise_.set_value(); + allow_save_.wait(); + return {}; + } + + private: + TenantQuotaPolicySnapshot snapshot_; + std::promise save_started_promise_; + std::promise allow_save_promise_; + std::future allow_save_; +}; + +#ifdef USE_NOF +class BlockingAllocationStrategy final : public AllocationStrategy { + public: + BlockingAllocationStrategy() + : allow_allocation_(allow_allocation_promise_.get_future()) {} + + std::future AllocationStarted() { + return allocation_started_promise_.get_future(); + } + + void AllowAllocation() { allow_allocation_promise_.set_value(); } + + tl::expected, ErrorCode> Allocate( + const AllocatorManager& allocator_manager, const size_t slice_length, + const size_t replica_num, + const std::vector& preferred_segments, + const std::set& excluded_segments, + const ReplicaType replica_type) override { + BlockOnce(); + return delegate_.Allocate(allocator_manager, slice_length, replica_num, + preferred_segments, excluded_segments, + replica_type); + } + + tl::expected, ErrorCode> Allocate( + const AllocatorManager& allocator_manager, const size_t slice_length, + const size_t replica_num, + const std::vector& preferred_segments, + const std::set& excluded_segments, + const ReplicaType replica_type, + const SsdMetricsProvider* ssd_provider) override { + (void)ssd_provider; + return Allocate(allocator_manager, slice_length, replica_num, + preferred_segments, excluded_segments, replica_type); + } + + tl::expected AllocateFrom( + const AllocatorManager& allocator_manager, const size_t slice_length, + const std::string& segment_name) override { + return delegate_.AllocateFrom(allocator_manager, slice_length, + segment_name); + } + + private: + void BlockOnce() { + bool expected = true; + if (block_next_allocation_.compare_exchange_strong(expected, false)) { + allocation_started_promise_.set_value(); + allow_allocation_.wait(); + } + } + + RandomAllocationStrategy delegate_; + std::atomic block_next_allocation_{true}; + std::promise allocation_started_promise_; + std::promise allow_allocation_promise_; + std::future allow_allocation_; +}; +#endif + class MasterServiceTenantQuotaTest : public ::testing::Test { protected: static constexpr size_t kSegmentBase = 0x500000000; - MasterServiceConfig MakeConfig(uint64_t default_quota, - uint64_t pool_capacity, - bool enable_quota = true, - std::string root_fs_dir = "") { - return MasterServiceConfig::builder() - .set_root_fs_dir(root_fs_dir) - .set_enable_tenant_quota(enable_quota) - .set_default_tenant_quota_bytes(default_quota) - .set_tenant_quota_pool_capacity_bytes(pool_capacity) - .build(); + std::string WritePolicyFile( + const std::map& tenant_quotas) { + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = tenant_quotas; + auto path = + std::filesystem::temp_directory_path() / + ("mooncake_tenant_quota_test_" + std::to_string(::getpid()) + "_" + + std::to_string(next_policy_file_++) + ".yaml"); + std::ofstream out(path); + out << FormatTenantQuotaPolicyYaml(snapshot); + out.close(); + policy_files_.push_back(path.string()); + return path.string(); + } + + MasterServiceConfig MakeConfig( + const std::map& tenant_quotas, + bool enable_multi_tenants = true) { + auto builder = MasterServiceConfig::builder().set_enable_multi_tenants( + enable_multi_tenants); + if (enable_multi_tenants) { + builder.set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri(WritePolicyFile(tenant_quotas)); + } + return builder.build(); } UUID MountSegment(MasterService& service, size_t size = 4096, @@ -39,85 +157,45 @@ class MasterServiceTenantQuotaTest : public ::testing::Test { UUID client_id = generate_uuid(); auto result = service.MountSegment(segment, client_id); - EXPECT_TRUE(result.has_value()); + EXPECT_TRUE(result.has_value()) << toString(result.error()); return client_id; } - ReplicateConfig MemoryConfig() { - ReplicateConfig config; - config.replica_num = 1; - return config; - } +#ifdef USE_NOF + UUID MountNoFSegment(MasterService& service, size_t size = 4096, + std::string name = "quota_nof_segment") { + NoFSegment segment; + segment.id = generate_uuid(); + segment.name = std::move(name); + segment.base = kSegmentBase + next_segment_offset_; + segment.size = size; + segment.te_endpoint = segment.name; + next_segment_offset_ += size + 4096; - ReplicateConfig HardPinnedMemoryConfig() { - ReplicateConfig config = MemoryConfig(); - config.with_hard_pin = true; - return config; + UUID client_id = generate_uuid(); + auto result = service.MountNoFSegment(segment, client_id); + EXPECT_TRUE(result.has_value()) << toString(result.error()); + return client_id; } +#endif - MasterServiceConfig MakeOffloadConfig(uint64_t default_quota, - uint64_t pool_capacity) { - auto config = MakeConfig(default_quota, pool_capacity); - config.enable_offload = true; - config.offload_on_evict = true; - config.promotion_on_hit = true; - config.promotion_admission_threshold = 1; - config.default_kv_lease_ttl = 0; + ReplicateConfig MemoryConfig() { + ReplicateConfig config; + config.replica_num = 1; return config; } void PutComplete(MasterService& service, const UUID& client_id, const std::string& key, const std::string& tenant_id, uint64_t size) { - PutComplete(service, client_id, key, tenant_id, size, MemoryConfig()); - } - - void PutComplete(MasterService& service, const UUID& client_id, - const std::string& key, const std::string& tenant_id, - uint64_t size, const ReplicateConfig& config) { - auto start = service.PutStart(client_id, key, tenant_id, size, config); + auto start = + service.PutStart(client_id, key, tenant_id, size, MemoryConfig()); ASSERT_TRUE(start.has_value()) << toString(start.error()); auto end = service.PutEnd(client_id, key, tenant_id, ReplicaType::MEMORY); ASSERT_TRUE(end.has_value()) << toString(end.error()); } - void MountLocalDiskSegment(MasterService& service, const UUID& client_id) { - auto mount = service.MountLocalDiskSegment(client_id, true); - ASSERT_TRUE(mount.has_value()) << toString(mount.error()); - } - - std::unordered_map DrainOffloadQueue( - MasterService& service, const UUID& client_id) { - auto result = service.OffloadObjectHeartbeat(client_id, true); - EXPECT_TRUE(result.has_value()); - std::unordered_map queued; - if (!result.has_value()) { - return queued; - } - for (const auto& task : result.value()) { - queued[task.key] = task.size; - } - return queued; - } - - void InjectLocalDiskReplica(MasterService& service, const UUID& client_id, - const std::string& key, - const std::string& tenant_id, int64_t size, - const std::string& transport_endpoint) { - std::vector tasks{ - OffloadTaskItem{.tenant_id = tenant_id, .key = key, .size = size}}; - StorageObjectMetadata metadata; - metadata.bucket_id = 0; - metadata.offset = 0; - metadata.key_size = static_cast(key.size()); - metadata.data_size = size; - metadata.transport_endpoint = transport_endpoint; - std::vector metadatas{metadata}; - auto result = service.NotifyOffloadSuccess(client_id, tasks, metadatas); - ASSERT_TRUE(result.has_value()) << toString(result.error()); - } - TenantQuotaSnapshot Snapshot(MasterService& service, const std::string& tenant_id) { auto snapshot = service.GetTenantQuotaSnapshotForTesting(tenant_id); @@ -125,519 +203,642 @@ class MasterServiceTenantQuotaTest : public ::testing::Test { return *snapshot; } - tl::expected ReserveQuota(MasterService& service, - const std::string& tenant_id, - uint64_t bytes) { - return service.ReserveTenantQuota(tenant_id, bytes); - } - - void CommitQuota(MasterService& service, const std::string& tenant_id, - uint64_t bytes) { - service.CommitTenantQuota(tenant_id, bytes); - } - - void AbortQuota(MasterService& service, const std::string& tenant_id, - uint64_t bytes) { - service.AbortTenantQuota(tenant_id, bytes); - } - - void ReleaseQuota(MasterService& service, const std::string& tenant_id, - uint64_t bytes) { - service.ReleaseTenantQuota(tenant_id, bytes); + void ReloadTenantQuotaPolicyFromStore(MasterService& service) { + service.LoadTenantQuotaPoliciesFromStoreOrThrow(); + service.RebuildTenantQuotaUsageFromMetadata(); } - void ReleaseQuotaPartial(MasterService& service, - const std::string& tenant_id, uint64_t bytes) { - service.ReleaseTenantQuotaPartial(tenant_id, bytes); + void ReplaceTenantQuotaPolicyStore( + MasterService& service, std::unique_ptr store) { + service.tenant_quota_policy_store_ = std::move(store); } - uint64_t ComputeQuotaDeficit(MasterService& service, - const std::string& tenant_id, uint64_t bytes) { - return service.ComputeTenantQuotaDeficit(tenant_id, bytes); + int64_t LocalDiskUsedBytes(MasterService& service, const UUID& client_id) { + auto access = service.segment_manager_.getLocalDiskSegmentAccess(); + auto& segments = access.getClientLocalDiskSegment(); + auto it = segments.find(client_id); + EXPECT_TRUE(it != segments.end()); + if (it == segments.end()) { + return -1; + } + return it->second->ssd_used_bytes.load(std::memory_order_relaxed); } - void RecomputeTenantQuotas(MasterService& service) { - service.RecomputeTenantEffectiveQuotas(); +#ifdef USE_NOF + void ReplaceAllocationStrategy( + MasterService& service, std::shared_ptr strategy) { + service.allocation_strategy_ = std::move(strategy); } +#endif - void BatchEvict(MasterService& service) { - service.BatchEvict(/*evict_ratio_target=*/1.0, - /*evict_ratio_lowerbound=*/1.0); + tl::expected ReserveTenantQuotaForTest( + MasterService& service, const std::string& tenant_id, uint64_t bytes) { + return service.ReserveTenantQuota(tenant_id, bytes); } - void SetExplicitTenantPolicy(MasterService& service, - const std::string& tenant_id, - uint64_t requested_quota_bytes) { - const auto normalized_tenant = NormalizeTenantId(tenant_id); - const auto shard_idx = - service.getTenantQuotaShardIndex(normalized_tenant); - auto& shard = service.tenant_quota_shards_[shard_idx]; - std::lock_guard lock(shard.mutex); - auto& state = shard.tenants[normalized_tenant]; - state.requested_quota_bytes = requested_quota_bytes; - state.effective_quota_bytes = requested_quota_bytes; - state.has_explicit_policy = true; - state.over_quota = false; + std::unique_lock LockSnapshotForTest( + MasterService& service) { + return std::unique_lock(service.snapshot_mutex_); } - void SetEmptyInheritedTenantState(MasterService& service, - const std::string& tenant_id) { - const auto normalized_tenant = NormalizeTenantId(tenant_id); - const auto shard_idx = - service.getTenantQuotaShardIndex(normalized_tenant); - auto& shard = service.tenant_quota_shards_[shard_idx]; - std::lock_guard lock(shard.mutex); - auto& state = shard.tenants[normalized_tenant]; - state.requested_quota_bytes = service.default_tenant_quota_bytes_; - state.effective_quota_bytes = service.default_tenant_quota_bytes_; - state.has_explicit_policy = false; - state.over_quota = false; + bool WaitForTenantQuotaPolicyMutexContention(MasterService& service) { + for (int i = 0; i < 500; ++i) { + if (!service.tenant_quota_policy_mutex_.try_lock()) { + return true; + } + service.tenant_quota_policy_mutex_.unlock(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + return false; } - void ExpectSameAccounting(const TenantQuotaSnapshot& before, - const TenantQuotaSnapshot& after) { - EXPECT_EQ(after.requested_quota_bytes, before.requested_quota_bytes); - EXPECT_EQ(after.effective_quota_bytes, before.effective_quota_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); - EXPECT_EQ(after.has_explicit_policy, before.has_explicit_policy); - EXPECT_EQ(after.over_quota, before.over_quota); + void TearDown() override { + for (const auto& path : policy_files_) { + std::error_code ec; + std::filesystem::remove(path, ec); + } } size_t next_segment_offset_ = 0; + size_t next_policy_file_ = 0; + std::vector policy_files_; }; -TEST_F(MasterServiceTenantQuotaTest, DisabledPreservesLegacyPutRemove) { - MasterService service(MakeConfig(/*default_quota=*/128, - /*pool_capacity=*/128, - /*enable_quota=*/false)); - UUID client_id = MountSegment(service); - - PutComplete(service, client_id, "large", "tenant-a", 512); - EXPECT_TRUE( - service.Remove("large", "tenant-a", /*force=*/true).has_value()); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); -} - -TEST_F(MasterServiceTenantQuotaTest, SameKeyDifferentTenantsIndependent) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/2000)); - UUID client_id = MountSegment(service); +TEST_F(MasterServiceTenantQuotaTest, + SingleTenantModeCollapsesTenantsAndDisablesQuota) { + MasterService service(MakeConfig({}, /*enable_multi_tenants=*/false)); + UUID client_id = MountSegment(service, /*size=*/1024); PutComplete(service, client_id, "shared-key", "tenant-a", 800); - PutComplete(service, client_id, "shared-key", "tenant-b", 800); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 800); - EXPECT_EQ(Snapshot(service, "tenant-b").used_bytes, 800); + EXPECT_TRUE(service.ExistKey("shared-key", "tenant-b").value()); + auto duplicate = service.PutStart(client_id, "shared-key", "tenant-b", 1, + MemoryConfig()); + ASSERT_FALSE(duplicate.has_value()); + EXPECT_EQ(duplicate.error(), ErrorCode::OBJECT_ALREADY_EXISTS); + EXPECT_TRUE(service + .Remove("shared-key", "tenant-b", + /*force=*/true) + .has_value()); + EXPECT_FALSE( + service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); } -TEST_F(MasterServiceTenantQuotaTest, SameTenantSharesQuotaAcrossKeys) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); +TEST_F(MasterServiceTenantQuotaTest, + MultiTenantModeRejectsEmptyUnregisteredAndImplicitDefaultWrites) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); UUID client_id = MountSegment(service); - PutComplete(service, client_id, "key-a", "tenant-a", 600, - HardPinnedMemoryConfig()); - auto over_quota = - service.PutStart(client_id, "key-b", "tenant-a", 500, MemoryConfig()); + auto empty = service.PutStart(client_id, "empty", "", 10, MemoryConfig()); + ASSERT_FALSE(empty.has_value()); + EXPECT_EQ(empty.error(), ErrorCode::TENANT_NOT_REGISTERED); - ASSERT_FALSE(over_quota.has_value()); - EXPECT_EQ(over_quota.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 600); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 0); -} + auto missing = + service.PutStart(client_id, "missing", "tenant-b", 10, MemoryConfig()); + ASSERT_FALSE(missing.has_value()); + EXPECT_EQ(missing.error(), ErrorCode::TENANT_NOT_REGISTERED); -TEST_F(MasterServiceTenantQuotaTest, FirstTenantPutStartUsesPoolCapacity) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/100)); - UUID client_id = MountSegment(service); + auto implicit_default = service.PutStart(client_id, "default-key", + "default", 10, MemoryConfig()); + ASSERT_FALSE(implicit_default.has_value()); + EXPECT_EQ(implicit_default.error(), ErrorCode::TENANT_NOT_REGISTERED); - auto over_quota = - service.PutStart(client_id, "large", "tenant-a", 800, MemoryConfig()); + const std::string control_tenant("tenant\0bad", 10); + auto control = service.PutStart(client_id, "control-key", control_tenant, + 10, MemoryConfig()); + ASSERT_FALSE(control.has_value()); + EXPECT_EQ(control.error(), ErrorCode::TENANT_NOT_REGISTERED); - ASSERT_FALSE(over_quota.has_value()); - EXPECT_EQ(over_quota.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); + auto control_policy = service.UpsertTenantQuotaPolicy(control_tenant, 100); + ASSERT_FALSE(control_policy.has_value()); + EXPECT_EQ(control_policy.error(), ErrorCode::INVALID_PARAMS); - auto start = - service.PutStart(client_id, "small", "tenant-a", 80, MemoryConfig()); + auto register_default = service.UpsertTenantQuotaPolicy("default", 100); + ASSERT_TRUE(register_default.has_value()) + << toString(register_default.error()); + PutComplete(service, client_id, "registered-default", "default", 10); - ASSERT_TRUE(start.has_value()) << toString(start.error()); - auto snapshot = Snapshot(service, "tenant-a"); - EXPECT_EQ(snapshot.requested_quota_bytes, 1000); - EXPECT_EQ(snapshot.effective_quota_bytes, 100); - EXPECT_EQ(snapshot.used_bytes, 0); - EXPECT_EQ(snapshot.reserved_bytes, 80); - AbortQuota(service, "tenant-a", 80); + PutComplete(service, client_id, "ok", "tenant-a", 10); } TEST_F(MasterServiceTenantQuotaTest, - ZeroDefaultQuotaUsesRegisteredCapacityForDefaultTenant) { - MasterService service(MakeConfig(/*default_quota=*/0, - /*pool_capacity=*/0)); - MountSegment(service, /*size=*/4096); + MultiTenantModeRejectsUnregisteredOffloadSuccess) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + UUID client_id = MountSegment(service); - auto reserve = ReserveQuota(service, "tenant-a", 4096); + StorageObjectMetadata metadata; + metadata.data_size = 128; + metadata.transport_endpoint = "disk-endpoint"; + std::vector tasks{ + OffloadTaskItem{.tenant_id = "tenant-b", .key = "ghost", .size = 128}}; - ASSERT_TRUE(reserve.has_value()) << toString(reserve.error()); - auto snapshot = Snapshot(service, "tenant-a"); - EXPECT_EQ(snapshot.requested_quota_bytes, 0); - EXPECT_EQ(snapshot.effective_quota_bytes, 4096); - EXPECT_EQ(snapshot.reserved_bytes, 4096); -} + auto result = service.NotifyOffloadSuccess(client_id, tasks, {metadata}); -TEST_F(MasterServiceTenantQuotaTest, - OverQuotaReserveDoesNotChangeReservedBytes) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 80).has_value()); - auto before = Snapshot(service, "tenant-a"); - - auto reserve = ReserveQuota(service, "tenant-a", 30); - - ASSERT_FALSE(reserve.has_value()); - EXPECT_EQ(reserve.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - ExpectSameAccounting(before, Snapshot(service, "tenant-a")); + ASSERT_FALSE(result.has_value()); + EXPECT_EQ(result.error(), ErrorCode::TENANT_NOT_REGISTERED); + auto missing = service.ExistKey("ghost", "tenant-b"); + ASSERT_TRUE(missing.has_value()) << toString(missing.error()); + EXPECT_FALSE(missing.value()); } TEST_F(MasterServiceTenantQuotaTest, - QuotaDeficitReturnsZeroWhenCurrentDemandFits) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); - MountSegment(service, /*size=*/1000); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 400).has_value()); - CommitQuota(service, "tenant-a", 400); - - EXPECT_EQ(ComputeQuotaDeficit(service, "tenant-a", 100), 0); - EXPECT_EQ(ComputeQuotaDeficit(service, "tenant-a", 700), 100); -} + MultiTenantModeAllowsRegisteredOffloadSuccess) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + UUID client_id = MountSegment(service); -TEST_F(MasterServiceTenantQuotaTest, - FirstOverQuotaReserveDoesNotCreateTenantState) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); + StorageObjectMetadata metadata; + metadata.data_size = 128; + metadata.transport_endpoint = "disk-endpoint"; + std::vector tasks{ + OffloadTaskItem{.tenant_id = "tenant-a", .key = "cold", .size = 128}}; - auto reserve = ReserveQuota(service, "tenant-a", 101); + auto result = service.NotifyOffloadSuccess(client_id, tasks, {metadata}); - ASSERT_FALSE(reserve.has_value()); - EXPECT_EQ(reserve.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); + ASSERT_TRUE(result.has_value()) << toString(result.error()); + auto exists = service.ExistKey("cold", "tenant-a"); + ASSERT_TRUE(exists.has_value()) << toString(exists.error()); + EXPECT_TRUE(exists.value()); + EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 0); } -TEST_F(MasterServiceTenantQuotaTest, RecomputePrunesEmptyInheritedTenantState) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/1000)); - SetEmptyInheritedTenantState(service, "tenant-a"); +TEST_F(MasterServiceTenantQuotaTest, + ConnectorPolicyReloadKeepsLocalDiskOnlyOrphanVisible) { + const std::string initial_policy = + WritePolicyFile({{"tenant-a", 1000}, {"tenant-b", 1000}}); + auto config = MasterServiceConfig::builder() + .set_enable_multi_tenants(true) + .set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri(initial_policy) + .build(); + MasterService service(config); + UUID client_id = MountSegment(service); + + StorageObjectMetadata metadata; + metadata.data_size = 128; + metadata.transport_endpoint = "disk-endpoint"; + std::vector tasks{ + OffloadTaskItem{.tenant_id = "tenant-b", .key = "cold", .size = 128}}; ASSERT_TRUE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); - SetExplicitTenantPolicy(service, "tenant-explicit", 200); + service.NotifyOffloadSuccess(client_id, tasks, {metadata}).has_value()); - RecomputeTenantQuotas(service); + { + std::ofstream out(initial_policy); + TenantQuotaPolicySnapshot replacement; + replacement.tenant_quotas = {{"tenant-a", 1000}}; + out << FormatTenantQuotaPolicyYaml(replacement); + } + ReloadTenantQuotaPolicyFromStore(service); + + auto orphan = Snapshot(service, "tenant-b"); + EXPECT_FALSE(orphan.has_explicit_policy); + EXPECT_EQ(orphan.used_bytes, 0); + EXPECT_EQ(orphan.committed_count, 0); + EXPECT_EQ(orphan.metadata_object_count, 1); + EXPECT_TRUE(orphan.over_quota); + EXPECT_TRUE(service.Remove("cold", "tenant-b", /*force=*/true).has_value()); EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); - auto explicit_snapshot = Snapshot(service, "tenant-explicit"); - EXPECT_TRUE(explicit_snapshot.has_explicit_policy); - EXPECT_EQ(explicit_snapshot.requested_quota_bytes, 200); + service.GetTenantQuotaSnapshotForTesting("tenant-b").has_value()); } TEST_F(MasterServiceTenantQuotaTest, - AbortPrunesInactiveInheritedTenantAndRecomputesQuotas) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); + NotifyOffloadSuccessCompletesExistingOrphanObject) { + const std::string initial_policy = + WritePolicyFile({{"tenant-a", 1000}, {"tenant-b", 1000}}); + auto config = MasterServiceConfig::builder() + .set_enable_multi_tenants(true) + .set_enable_offload(true) + .set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri(initial_policy) + .build(); + MasterService service(config); + UUID client_id = MountSegment(service); + ASSERT_TRUE(service.MountLocalDiskSegment(client_id, true).has_value()); + PutComplete(service, client_id, "warming", "tenant-b", 128); + + { + std::ofstream out(initial_policy); + TenantQuotaPolicySnapshot replacement; + replacement.tenant_quotas = {{"tenant-a", 1000}}; + out << FormatTenantQuotaPolicyYaml(replacement); + } + ReloadTenantQuotaPolicyFromStore(service); + EXPECT_FALSE(Snapshot(service, "tenant-b").has_explicit_policy); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 50).has_value()); - ASSERT_TRUE(ReserveQuota(service, "tenant-b", 40).has_value()); - EXPECT_EQ(Snapshot(service, "tenant-b").effective_quota_bytes, 50); + StorageObjectMetadata metadata; + metadata.data_size = 128; + metadata.transport_endpoint = "disk-endpoint"; + std::vector tasks{OffloadTaskItem{ + .tenant_id = "tenant-b", .key = "warming", .size = 128}}; - AbortQuota(service, "tenant-a", 50); + auto result = service.NotifyOffloadSuccess(client_id, tasks, {metadata}); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); - EXPECT_EQ(Snapshot(service, "tenant-b").effective_quota_bytes, 100); - ASSERT_TRUE(ReserveQuota(service, "tenant-b", 60).has_value()); - EXPECT_EQ(Snapshot(service, "tenant-b").reserved_bytes, 100); - AbortQuota(service, "tenant-b", 100); + ASSERT_TRUE(result.has_value()) << toString(result.error()); + auto replicas = service.GetReplicaList("warming", "tenant-b"); + ASSERT_TRUE(replicas.has_value()) << toString(replicas.error()); + EXPECT_TRUE(std::any_of(replicas->replicas.begin(), + replicas->replicas.end(), + [](const Replica::Descriptor& replica) { + return replica.is_local_disk_replica(); + })); } TEST_F(MasterServiceTenantQuotaTest, - FullReleasePrunesInactiveInheritedTenantAndRecomputesQuotas) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/100)); + NotifyOffloadSuccessRejectsOrphanObjectWithoutOffloadTask) { + const std::string initial_policy = + WritePolicyFile({{"tenant-a", 1000}, {"tenant-b", 1000}}); + auto config = MasterServiceConfig::builder() + .set_enable_multi_tenants(true) + .set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri(initial_policy) + .build(); + MasterService service(config); UUID client_id = MountSegment(service); - PutComplete(service, client_id, "key-a", "tenant-a", 50); - PutComplete(service, client_id, "key-b", "tenant-b", 40); - EXPECT_EQ(Snapshot(service, "tenant-b").effective_quota_bytes, 50); + PutComplete(service, client_id, "warming", "tenant-b", 128); - ASSERT_TRUE( - service.Remove("key-a", "tenant-a", /*force=*/true).has_value()); + { + std::ofstream out(initial_policy); + TenantQuotaPolicySnapshot replacement; + replacement.tenant_quotas = {{"tenant-a", 1000}}; + out << FormatTenantQuotaPolicyYaml(replacement); + } + ReloadTenantQuotaPolicyFromStore(service); + EXPECT_FALSE(Snapshot(service, "tenant-b").has_explicit_policy); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); - EXPECT_EQ(Snapshot(service, "tenant-b").effective_quota_bytes, 100); - auto reserve = service.PutStart(client_id, "key-b-extra", "tenant-b", 60, - MemoryConfig()); - ASSERT_TRUE(reserve.has_value()) << toString(reserve.error()); - EXPECT_EQ(Snapshot(service, "tenant-b").reserved_bytes, 60); - AbortQuota(service, "tenant-b", 60); -} + StorageObjectMetadata metadata; + metadata.data_size = 128; + metadata.transport_endpoint = "disk-endpoint"; + std::vector tasks{OffloadTaskItem{ + .tenant_id = "tenant-b", .key = "warming", .size = 128}}; -TEST_F(MasterServiceTenantQuotaTest, CommitMismatchDoesNotMutateAccounting) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 40).has_value()); - auto before = Snapshot(service, "tenant-a"); + auto result = service.NotifyOffloadSuccess(client_id, tasks, {metadata}); - CommitQuota(service, "tenant-a", 50); + ASSERT_FALSE(result.has_value()); + EXPECT_EQ(result.error(), ErrorCode::TENANT_NOT_REGISTERED); +} - ExpectSameAccounting(before, Snapshot(service, "tenant-a")); +TEST_F(MasterServiceTenantQuotaTest, + NotifyOffloadSuccessDoesNotCountAddReplicaUpdateAsNewDiskUsage) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + UUID client_a = MountSegment(service, 4096, "quota_segment_a"); + UUID client_b = MountSegment(service, 4096, "quota_segment_b"); + ASSERT_TRUE(service.MountLocalDiskSegment(client_a, true).has_value()); + ASSERT_TRUE(service.MountLocalDiskSegment(client_b, true).has_value()); + + StorageObjectMetadata first_metadata; + first_metadata.data_size = 128; + first_metadata.transport_endpoint = "disk-endpoint-a"; + std::vector tasks{ + OffloadTaskItem{.tenant_id = "tenant-a", .key = "cold", .size = 128}}; + ASSERT_TRUE(service.NotifyOffloadSuccess(client_a, tasks, {first_metadata}) + .has_value()); + EXPECT_EQ(LocalDiskUsedBytes(service, client_a), 128); + EXPECT_EQ(LocalDiskUsedBytes(service, client_b), 0); + + StorageObjectMetadata second_metadata; + second_metadata.data_size = 128; + second_metadata.transport_endpoint = "disk-endpoint-b"; + auto result = + service.NotifyOffloadSuccess(client_b, tasks, {second_metadata}); + + ASSERT_TRUE(result.has_value()) << toString(result.error()); + EXPECT_EQ(LocalDiskUsedBytes(service, client_a), 128); + EXPECT_EQ(LocalDiskUsedBytes(service, client_b), 0); } -TEST_F(MasterServiceTenantQuotaTest, AbortMismatchDoesNotMutateAccounting) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 40).has_value()); - auto before = Snapshot(service, "tenant-a"); +TEST_F(MasterServiceTenantQuotaTest, + RegisteredTenantQuotaAdmissionDoesNotCreateImplicitTenants) { + MasterService service(MakeConfig({{"tenant-a", 100}})); + UUID client_id = MountSegment(service); + + auto hard_pinned = MemoryConfig(); + hard_pinned.with_hard_pin = true; + auto first = + service.PutStart(client_id, "key-a", "tenant-a", 80, hard_pinned); + ASSERT_TRUE(first.has_value()) << toString(first.error()); + ASSERT_TRUE( + service.PutEnd(client_id, "key-a", "tenant-a", ReplicaType::MEMORY) + .has_value()); - AbortQuota(service, "tenant-a", 50); + auto over = + service.PutStart(client_id, "key-b", "tenant-a", 30, MemoryConfig()); - ExpectSameAccounting(before, Snapshot(service, "tenant-a")); + ASSERT_FALSE(over.has_value()); + EXPECT_EQ(over.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); + EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 80); + EXPECT_FALSE( + service.GetTenantQuotaSnapshotForTesting("tenant-b").has_value()); } -TEST_F(MasterServiceTenantQuotaTest, ReleaseMismatchDoesNotMutateAccounting) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 40).has_value()); - CommitQuota(service, "tenant-a", 40); - auto before = Snapshot(service, "tenant-a"); +TEST_F(MasterServiceTenantQuotaTest, CopyStartRequiresQuotaForNewReplica) { + MasterService service(MakeConfig({{"tenant-a", 150}})); + UUID client_id = MountSegment(service, /*size=*/1024, "segment-a"); + MountSegment(service, /*size=*/1024, "segment-b"); - ReleaseQuota(service, "tenant-a", 50); + ReplicateConfig config = MemoryConfig(); + config.preferred_segment = "segment-a"; + auto put_start = + service.PutStart(client_id, "key", "tenant-a", 100, config); + ASSERT_TRUE(put_start.has_value()) << toString(put_start.error()); + ASSERT_TRUE( + service.PutEnd(client_id, "key", "tenant-a", ReplicaType::MEMORY) + .has_value()); - ExpectSameAccounting(before, Snapshot(service, "tenant-a")); + auto copy = service.CopyStart(client_id, "key", "tenant-a", "segment-a", + {"segment-b"}); + + ASSERT_FALSE(copy.has_value()); + EXPECT_EQ(copy.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); + auto snapshot = Snapshot(service, "tenant-a"); + EXPECT_EQ(snapshot.used_bytes, 100); + EXPECT_EQ(snapshot.reserved_bytes, 0); + EXPECT_EQ(snapshot.committed_count, 1); } TEST_F(MasterServiceTenantQuotaTest, - ReleasePartialMismatchDoesNotMutateAccounting) { - MasterService service(MakeConfig(/*default_quota=*/100, - /*pool_capacity=*/100)); - MountSegment(service, /*size=*/100); - ASSERT_TRUE(ReserveQuota(service, "tenant-a", 40).has_value()); - CommitQuota(service, "tenant-a", 40); - auto before = Snapshot(service, "tenant-a"); - - ReleaseQuotaPartial(service, "tenant-a", 50); - - ExpectSameAccounting(before, Snapshot(service, "tenant-a")); + CopyEndCommitsAdditionalReplicaWithoutExtraObjectCount) { + MasterService service(MakeConfig({{"tenant-a", 300}})); + UUID client_id = MountSegment(service, /*size=*/1024, "segment-a"); + MountSegment(service, /*size=*/1024, "segment-b"); + + ReplicateConfig config = MemoryConfig(); + config.preferred_segment = "segment-a"; + auto put_start = + service.PutStart(client_id, "key", "tenant-a", 100, config); + ASSERT_TRUE(put_start.has_value()) << toString(put_start.error()); + ASSERT_TRUE( + service.PutEnd(client_id, "key", "tenant-a", ReplicaType::MEMORY) + .has_value()); + + auto copy = service.CopyStart(client_id, "key", "tenant-a", "segment-a", + {"segment-b"}); + ASSERT_TRUE(copy.has_value()) << toString(copy.error()); + auto in_flight = Snapshot(service, "tenant-a"); + EXPECT_EQ(in_flight.used_bytes, 100); + EXPECT_EQ(in_flight.reserved_bytes, 100); + + ASSERT_TRUE(service.CopyEnd(client_id, "key", "tenant-a").has_value()); + auto completed = Snapshot(service, "tenant-a"); + EXPECT_EQ(completed.used_bytes, 200); + EXPECT_EQ(completed.reserved_bytes, 0); + EXPECT_EQ(completed.committed_count, 1); + EXPECT_EQ(completed.metadata_object_count, 1); } TEST_F(MasterServiceTenantQuotaTest, - RegisteredCapacityQuotaFailurePrecedesAllocatorFailure) { - MasterService service(MakeConfig(/*default_quota=*/4096, - /*pool_capacity=*/4096)); - UUID client_id = MountSegment(service, /*size=*/512); + MoveStartRequiresQuotaForTemporaryReplica) { + MasterService service(MakeConfig({{"tenant-a", 150}})); + UUID client_id = MountSegment(service, /*size=*/1024, "segment-a"); + MountSegment(service, /*size=*/1024, "segment-b"); + + ReplicateConfig config = MemoryConfig(); + config.preferred_segment = "segment-a"; + auto put_start = + service.PutStart(client_id, "key", "tenant-a", 100, config); + ASSERT_TRUE(put_start.has_value()) << toString(put_start.error()); + ASSERT_TRUE( + service.PutEnd(client_id, "key", "tenant-a", ReplicaType::MEMORY) + .has_value()); - auto result = service.PutStart(client_id, "too-large", "tenant-a", 1024, - MemoryConfig()); + auto move = service.MoveStart(client_id, "key", "tenant-a", "segment-a", + "segment-b"); - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); + ASSERT_FALSE(move.has_value()); + EXPECT_EQ(move.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); + auto snapshot = Snapshot(service, "tenant-a"); + EXPECT_EQ(snapshot.used_bytes, 100); + EXPECT_EQ(snapshot.reserved_bytes, 0); } -TEST_F(MasterServiceTenantQuotaTest, RemoveReleasesCommittedCharge) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); +TEST_F(MasterServiceTenantQuotaTest, AdminDeleteRequiresEmptyTenant) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); UUID client_id = MountSegment(service); + PutComplete(service, client_id, "key", "tenant-a", 100); - PutComplete(service, client_id, "key", "tenant-a", 400); - ASSERT_EQ(Snapshot(service, "tenant-a").used_bytes, 400); + auto empty_upsert = service.UpsertTenantQuotaPolicy("", 100); + ASSERT_FALSE(empty_upsert.has_value()); + EXPECT_EQ(empty_upsert.error(), ErrorCode::INVALID_PARAMS); - ASSERT_TRUE(service.Remove("key", "tenant-a", /*force=*/true).has_value()); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); + auto empty_delete = service.DeleteTenantQuotaPolicy(""); + ASSERT_FALSE(empty_delete.has_value()); + EXPECT_EQ(empty_delete.error(), ErrorCode::INVALID_PARAMS); + + auto delete_non_empty = service.DeleteTenantQuotaPolicy("tenant-a"); + ASSERT_FALSE(delete_non_empty.has_value()); + EXPECT_EQ(delete_non_empty.error(), ErrorCode::TENANT_NOT_EMPTY); + + auto upsert = service.UpsertTenantQuotaPolicy("tenant-b", 100); + ASSERT_TRUE(upsert.has_value()) << toString(upsert.error()); + auto delete_empty = service.DeleteTenantQuotaPolicy("tenant-b"); + ASSERT_TRUE(delete_empty.has_value()) << toString(delete_empty.error()); + EXPECT_FALSE(delete_empty.value().has_value()); } TEST_F(MasterServiceTenantQuotaTest, - BatchEvictReleasesEvictedMemoryReplicaCharge) { - MasterService service(MakeOffloadConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); - UUID client_id = - MountSegment(service, /*size=*/4096, "quota_evict_segment"); - MountLocalDiskSegment(service, client_id); + DeletePolicyBlocksValidatedReservationsBeforeConnectorSave) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + MountSegment(service); + + TenantQuotaPolicySnapshot current_policy; + current_policy.tenant_quotas = {{"tenant-a", 1000}}; + auto blocking_store = + std::make_unique(current_policy); + auto* blocking_store_ptr = blocking_store.get(); + auto save_started = blocking_store_ptr->SaveStarted(); + ReplaceTenantQuotaPolicyStore(service, std::move(blocking_store)); + + using DeleteResult = + tl::expected, ErrorCode>; + std::optional delete_result; + std::thread delete_thread([&] { + delete_result.emplace(service.DeleteTenantQuotaPolicy("tenant-a")); + }); + + if (save_started.wait_for(std::chrono::seconds(5)) != + std::future_status::ready) { + blocking_store_ptr->AllowSave(); + delete_thread.join(); + FAIL() << "timed out waiting for connector save"; + } + + auto reserve = ReserveTenantQuotaForTest(service, "tenant-a", 1); + EXPECT_FALSE(reserve.has_value()); + EXPECT_EQ(reserve.error(), ErrorCode::TENANT_NOT_REGISTERED); - PutComplete(service, client_id, "key", "tenant-a", 400); - InjectLocalDiskReplica(service, client_id, "key", "tenant-a", 400, - "quota_evict_segment"); - ASSERT_EQ(Snapshot(service, "tenant-a").used_bytes, 400); + auto zero_byte_reserve = ReserveTenantQuotaForTest(service, "tenant-a", 0); + EXPECT_FALSE(zero_byte_reserve.has_value()); + EXPECT_EQ(zero_byte_reserve.error(), ErrorCode::TENANT_NOT_REGISTERED); - BatchEvict(service); + blocking_store_ptr->AllowSave(); + delete_thread.join(); + ASSERT_TRUE(delete_result.has_value()); + ASSERT_TRUE(delete_result->has_value()) << toString(delete_result->error()); + EXPECT_FALSE(delete_result->value().has_value()); EXPECT_FALSE( service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); - auto reserve = service.PutStart(client_id, "after-evict", "tenant-a", 1000, - MemoryConfig()); - ASSERT_TRUE(reserve.has_value()) << toString(reserve.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 1000); - auto revoke = service.PutRevoke(client_id, "after-evict", "tenant-a", - ReplicaType::MEMORY); - ASSERT_TRUE(revoke.has_value()) << toString(revoke.error()); } TEST_F(MasterServiceTenantQuotaTest, - TenantQuotaEvictionQueuesOffloadOnEvictBeforeDeletingMemory) { - MasterService service(MakeOffloadConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); - UUID client_id = - MountSegment(service, /*size=*/4096, "quota_offload_segment"); - MountLocalDiskSegment(service, client_id); - - PutComplete(service, client_id, "old", "tenant-a", 600); - EXPECT_TRUE(DrainOffloadQueue(service, client_id).empty()) - << "offload_on_evict should not queue objects at PutEnd"; - - auto start = - service.PutStart(client_id, "new", "tenant-a", 600, MemoryConfig()); - ASSERT_FALSE(start.has_value()); - EXPECT_EQ(start.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - - auto queued = DrainOffloadQueue(service, client_id); - ASSERT_EQ(queued.size(), 1u); - EXPECT_EQ(queued["old"], 600); - EXPECT_TRUE(service.GetReplicaList("old", "tenant-a").has_value()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 600); - - InjectLocalDiskReplica(service, client_id, "old", "tenant-a", 600, - "quota_offload_segment"); - auto retry = - service.PutStart(client_id, "new", "tenant-a", 600, MemoryConfig()); - ASSERT_TRUE(retry.has_value()) << toString(retry.error()); - EXPECT_TRUE(service.GetReplicaList("old", "tenant-a").has_value()); - auto revoke = - service.PutRevoke(client_id, "new", "tenant-a", ReplicaType::MEMORY); - ASSERT_TRUE(revoke.has_value()) << toString(revoke.error()); -} + DeletePolicyWaitsForInFlightAddReplicaBeforeEmptyCheck) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + UUID client_id = MountSegment(service); -TEST_F(MasterServiceTenantQuotaTest, PromotionSuccessCommitsTenantQuota) { - auto config = MakeOffloadConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000); - config.default_kv_lease_ttl = 5000; - MasterService service(config); - UUID client_id = - MountSegment(service, /*size=*/4096, "quota_promotion_segment"); - MountLocalDiskSegment(service, client_id); - InjectLocalDiskReplica(service, client_id, "cold", "tenant-a", 400, - "quota_promotion_segment"); + TenantQuotaPolicySnapshot current_policy; + current_policy.tenant_quotas = {{"tenant-a", 1000}}; + auto blocking_store = + std::make_unique(current_policy); + auto* blocking_store_ptr = blocking_store.get(); + auto save_started = blocking_store_ptr->SaveStarted(); + ReplaceTenantQuotaPolicyStore(service, std::move(blocking_store)); + + auto snapshot_lock = LockSnapshotForTest(service); + std::optional> add_result; + std::thread add_thread([&] { + Replica replica(client_id, 128, "disk-endpoint", + ReplicaStatus::COMPLETE); + add_result.emplace( + service.AddReplica(client_id, "cold", "tenant-a", replica)); + }); + + if (!WaitForTenantQuotaPolicyMutexContention(service)) { + snapshot_lock.unlock(); + add_thread.join(); + FAIL() << "timed out waiting for AddReplica to enter tenant policy " + "critical section"; + } - auto replicas = service.GetReplicaList("cold", "tenant-a"); - ASSERT_TRUE(replicas.has_value()) << toString(replicas.error()); - auto alloc = service.PromotionAllocStart(client_id, "cold", "tenant-a", 400, - {"quota_promotion_segment"}); - ASSERT_TRUE(alloc.has_value()) << toString(alloc.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 0); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 400); - - auto notify = service.NotifyPromotionSuccess(client_id, "cold", "tenant-a"); - ASSERT_TRUE(notify.has_value()) << toString(notify.error()); - - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 400); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 0); - ASSERT_TRUE(service.GetReplicaList("cold", "tenant-a").has_value()); - auto over_quota = service.PutStart(client_id, "too-much", "tenant-a", 700, - MemoryConfig()); - ASSERT_FALSE(over_quota.has_value()); - EXPECT_EQ(over_quota.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); + using DeleteResult = + tl::expected, ErrorCode>; + std::optional delete_result; + std::thread delete_thread([&] { + delete_result.emplace(service.DeleteTenantQuotaPolicy("tenant-a")); + }); + + const auto premature_save = + save_started.wait_for(std::chrono::milliseconds(200)); + if (premature_save == std::future_status::ready) { + blocking_store_ptr->AllowSave(); + } + snapshot_lock.unlock(); + add_thread.join(); + delete_thread.join(); + + ASSERT_EQ(premature_save, std::future_status::timeout) + << "tenant deletion reached connector save before in-flight " + "AddReplica completed"; + ASSERT_TRUE(add_result.has_value()); + ASSERT_TRUE(add_result->has_value()) << toString(add_result->error()); + ASSERT_TRUE(delete_result.has_value()); + ASSERT_FALSE(delete_result->has_value()); + EXPECT_EQ(delete_result->error(), ErrorCode::TENANT_NOT_EMPTY); + auto exists = service.ExistKey("cold", "tenant-a"); + ASSERT_TRUE(exists.has_value()) << toString(exists.error()); + EXPECT_TRUE(exists.value()); } -TEST_F(MasterServiceTenantQuotaTest, PromotionAllocStartRejectsOverQuota) { - MasterService service(MakeOffloadConfig(/*default_quota=*/300, - /*pool_capacity=*/300)); - UUID client_id = - MountSegment(service, /*size=*/4096, "quota_promotion_reject_segment"); - MountLocalDiskSegment(service, client_id); - InjectLocalDiskReplica(service, client_id, "cold", "tenant-a", 400, - "quota_promotion_reject_segment"); +#ifdef USE_NOF +TEST_F(MasterServiceTenantQuotaTest, + DeletePolicyWaitsForZeroChargePutStartMetadataCreate) { + MasterService service(MakeConfig({{"tenant-a", 1000}})); + UUID client_id = MountNoFSegment(service); + + auto blocking_strategy = std::make_shared(); + auto* blocking_strategy_ptr = blocking_strategy.get(); + auto allocation_started = blocking_strategy_ptr->AllocationStarted(); + ReplaceAllocationStrategy(service, std::move(blocking_strategy)); + + ReplicateConfig config; + config.replica_num = 0; + config.nof_replica_num = 1; + + std::optional, ErrorCode>> + put_result; + std::thread put_thread([&] { + put_result.emplace( + service.PutStart(client_id, "nof-key", "tenant-a", 128, config)); + }); + + if (allocation_started.wait_for(std::chrono::seconds(5)) != + std::future_status::ready) { + blocking_strategy_ptr->AllowAllocation(); + put_thread.join(); + FAIL() << "timed out waiting for PutStart allocation"; + } - auto replicas = service.GetReplicaList("cold", "tenant-a"); - ASSERT_TRUE(replicas.has_value()) << toString(replicas.error()); - auto alloc = service.PromotionAllocStart( - client_id, "cold", "tenant-a", 400, {"quota_promotion_reject_segment"}); + using DeleteResult = + tl::expected, ErrorCode>; + std::optional delete_result; + std::thread delete_thread([&] { + delete_result.emplace(service.DeleteTenantQuotaPolicy("tenant-a")); + }); - ASSERT_FALSE(alloc.has_value()); - EXPECT_EQ(alloc.error(), ErrorCode::TENANT_QUOTA_EXCEEDED); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); -} + ASSERT_TRUE(WaitForTenantQuotaPolicyMutexContention(service)) + << "DeleteTenantQuotaPolicy did not wait for zero-charge PutStart"; -TEST_F(MasterServiceTenantQuotaTest, - DiskPutEndBeforeMemoryKeepsQuotaReservation) { - MasterService service( - MakeConfig(/*default_quota=*/1000, /*pool_capacity=*/1000, - /*enable_quota=*/true, /*root_fs_dir=*/"/tmp/mooncake")); - UUID client_id = MountSegment(service); + blocking_strategy_ptr->AllowAllocation(); + put_thread.join(); + delete_thread.join(); - auto start = - service.PutStart(client_id, "key", "tenant-a", 400, MemoryConfig()); - ASSERT_TRUE(start.has_value()) << toString(start.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 0); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 400); - - auto disk_end = - service.PutEnd(client_id, "key", "tenant-a", ReplicaType::DISK); - ASSERT_TRUE(disk_end.has_value()) << toString(disk_end.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 0); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 400); + ASSERT_TRUE(put_result.has_value()); + ASSERT_TRUE(put_result->has_value()) << toString(put_result->error()); + ASSERT_TRUE(delete_result.has_value()); + ASSERT_FALSE(delete_result->has_value()); + EXPECT_EQ(delete_result->error(), ErrorCode::TENANT_NOT_EMPTY); - auto memory_end = - service.PutEnd(client_id, "key", "tenant-a", ReplicaType::MEMORY); - ASSERT_TRUE(memory_end.has_value()) << toString(memory_end.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 400); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 0); + auto snapshot = Snapshot(service, "tenant-a"); + EXPECT_EQ(snapshot.used_bytes, 0); + EXPECT_EQ(snapshot.reserved_bytes, 0); + EXPECT_EQ(snapshot.metadata_object_count, 1); } +#endif -TEST_F(MasterServiceTenantQuotaTest, ChangedSizeUpsertSuccessSwapsCharge) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); - UUID client_id = MountSegment(service); +TEST_F(MasterServiceTenantQuotaTest, + EffectiveQuotaUsesOnlyExplicitPolicyAndScalesProportionally) { + MasterService service(MakeConfig({{"tenant-a", 200}, {"tenant-b", 400}})); + MountSegment(service, /*size=*/300); - PutComplete(service, client_id, "key", "tenant-a", 400); - auto start = - service.UpsertStart(client_id, "key", "tenant-a", 600, MemoryConfig()); - ASSERT_TRUE(start.has_value()) << toString(start.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 400); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 600); - - auto end = - service.UpsertEnd(client_id, "key", "tenant-a", ReplicaType::MEMORY); - ASSERT_TRUE(end.has_value()) << toString(end.error()); - EXPECT_EQ(Snapshot(service, "tenant-a").used_bytes, 600); - EXPECT_EQ(Snapshot(service, "tenant-a").reserved_bytes, 0); + EXPECT_EQ(Snapshot(service, "tenant-a").effective_quota_bytes, 100); + EXPECT_EQ(Snapshot(service, "tenant-b").effective_quota_bytes, 200); } -TEST_F(MasterServiceTenantQuotaTest, ChangedSizeUpsertRevokeReleasesOldAndNew) { - MasterService service(MakeConfig(/*default_quota=*/1000, - /*pool_capacity=*/1000)); +TEST_F(MasterServiceTenantQuotaTest, + ConnectorPolicyReloadCreatesOrphanStateAndAllowsCleanup) { + const std::string initial_policy = + WritePolicyFile({{"tenant-a", 1000}, {"tenant-b", 1000}}); + auto config = MasterServiceConfig::builder() + .set_enable_multi_tenants(true) + .set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri(initial_policy) + .build(); + MasterService service(config); UUID client_id = MountSegment(service); + PutComplete(service, client_id, "orphan-key", "tenant-b", 100); - PutComplete(service, client_id, "key", "tenant-a", 400); - auto start = - service.UpsertStart(client_id, "key", "tenant-a", 600, MemoryConfig()); - ASSERT_TRUE(start.has_value()) << toString(start.error()); - - auto revoke = - service.UpsertRevoke(client_id, "key", "tenant-a", ReplicaType::MEMORY); - ASSERT_TRUE(revoke.has_value()) << toString(revoke.error()); - EXPECT_FALSE( - service.GetTenantQuotaSnapshotForTesting("tenant-a").has_value()); + { + std::ofstream out(initial_policy); + TenantQuotaPolicySnapshot replacement; + replacement.tenant_quotas = {{"tenant-a", 1000}}; + out << FormatTenantQuotaPolicyYaml(replacement); + } + ReloadTenantQuotaPolicyFromStore(service); + + auto orphan = Snapshot(service, "tenant-b"); + EXPECT_FALSE(orphan.has_explicit_policy); + EXPECT_EQ(orphan.requested_quota_bytes, 0); + EXPECT_EQ(orphan.effective_quota_bytes, 0); + EXPECT_TRUE(orphan.over_quota); + + EXPECT_TRUE(service.GetReplicaList("orphan-key", "tenant-b").has_value()); + auto write = + service.PutStart(client_id, "new-key", "tenant-b", 1, MemoryConfig()); + ASSERT_FALSE(write.has_value()); + EXPECT_EQ(write.error(), ErrorCode::TENANT_NOT_REGISTERED); + + EXPECT_TRUE(service + .Remove("orphan-key", "tenant-b", + /*force=*/true) + .has_value()); } } // namespace mooncake::test diff --git a/mooncake-store/tests/master_service_test.cpp b/mooncake-store/tests/master_service_test.cpp index ad1c55e068..2822df7331 100644 --- a/mooncake-store/tests/master_service_test.cpp +++ b/mooncake-store/tests/master_service_test.cpp @@ -8,14 +8,21 @@ #include #include #include +#include +#include #include +#include #include #include +#include #include #include #include #include +#include + +#include "tenant_quota_policy_store.h" #include "types.h" namespace mooncake::test { @@ -34,6 +41,52 @@ class MasterServiceTest : public ::testing::Test { static constexpr size_t kDefaultSegmentBase = 0x300000000; static constexpr size_t kDefaultSegmentSize = 1024 * 1024 * 16; + static constexpr uint64_t kStrictTenantQuotaBytes = 4 * 1024 * 1024; + + std::string WriteTenantPolicyFile( + const std::map& tenant_quotas) { + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = tenant_quotas; + auto path = + std::filesystem::temp_directory_path() / + ("mooncake_master_service_test_" + std::to_string(::getpid()) + + "_" + std::to_string(next_policy_file_++) + ".yaml"); + std::ofstream out(path); + out << FormatTenantQuotaPolicyYaml(snapshot); + out.close(); + policy_files_.push_back(path.string()); + return path.string(); + } + + MasterServiceConfig MakeStrictTenantConfig( + const std::vector& tenants) { + std::map tenant_quotas; + for (const auto& tenant : tenants) { + tenant_quotas.emplace(tenant, kStrictTenantQuotaBytes); + } + return MasterServiceConfig::builder() + .set_enable_multi_tenants(true) + .set_tenant_quota_connector_type("file") + .set_tenant_quota_connector_uri( + WriteTenantPolicyFile(tenant_quotas)) + .build(); + } + + WrappedMasterServiceConfig MakeStrictWrappedConfig( + const std::vector& tenants) { + WrappedMasterServiceConfig config; + config.default_kv_lease_ttl = 100; + config.enable_metric_reporting = false; + config.enable_multi_tenants = true; + config.tenant_quota_connector_type = "file"; + std::map tenant_quotas; + for (const auto& tenant : tenants) { + tenant_quotas.emplace(tenant, kStrictTenantQuotaBytes); + } + config.tenant_quota_connector_uri = + WriteTenantPolicyFile(tenant_quotas); + return config; + } Segment MakeSegment(std::string name = "test_segment", size_t base = kDefaultSegmentBase, @@ -217,8 +270,16 @@ class MasterServiceTest : public ::testing::Test { } std::vector replica_list; + std::vector policy_files_; + size_t next_policy_file_ = 0; - void TearDown() override { google::ShutdownGoogleLogging(); } + void TearDown() override { + for (const auto& path : policy_files_) { + std::error_code ec; + std::filesystem::remove(path, ec); + } + google::ShutdownGoogleLogging(); + } }; TEST(TenantScopedStorageKeyTest, RoundTripsAndParsesLegacyKeys) { @@ -670,13 +731,14 @@ TEST_F(MasterServiceTest, GroupedObjectRoutesKeyLevelLookupAndRemove) { } TEST_F(MasterServiceTest, GroupRoutingIsTenantScopedForSameUserKey) { - std::unique_ptr service_(new MasterService()); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); - const UUID client_id = generate_uuid(); - const std::string key = "tenant_grouped_shared_user_key"; const std::string tenant_a = "tenant_group_route_a"; const std::string tenant_b = "tenant_group_route_b"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); + const UUID client_id = generate_uuid(); + const std::string group_a = FindGroupIdOnDifferentShard(key); std::string group_b; for (int i = 0; i < 10000; ++i) { @@ -763,13 +825,13 @@ TEST_F(MasterServiceTest, BatchGetReplicaListPreservesOrderWithGroupedKeys) { } TEST_F(MasterServiceTest, BatchGetReplicaListKeepsTenantIsolation) { - std::unique_ptr service_(new MasterService()); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); - const UUID client_id = generate_uuid(); - const std::string key = "batch_get_tenant_shared_key"; const std::string tenant_a = "batch_get_tenant_a"; const std::string tenant_b = "batch_get_tenant_b"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); + const UUID client_id = generate_uuid(); ReplicateConfig config_a; config_a.replica_num = 1; @@ -801,10 +863,12 @@ TEST_F(MasterServiceTest, BatchGetReplicaListKeepsTenantIsolation) { } TEST_F(MasterServiceTest, GetAllKeysListsOnlyRequestedTenant) { - std::unique_ptr service_(new MasterService()); + const std::string tenant_a = "tenant_get_all_keys_a"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a})); [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); const UUID client_id = generate_uuid(); - const std::string tenant_a = "tenant_get_all_keys_a"; + const std::string shared_key = "shared_listing_key"; const std::string default_only_key = "default_listing_key"; const std::string tenant_only_key = "tenant_listing_key"; @@ -1538,13 +1602,14 @@ TEST_F(MasterServiceTest, PutStartEndFlow) { } TEST_F(MasterServiceTest, TenantPutGetRemoveIsolatesSameUserKey) { - std::unique_ptr service_(new MasterService()); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); - const UUID client_id = generate_uuid(); - const std::string key = "shared_user_key"; const std::string tenant_a = "tenant_a"; const std::string tenant_b = "tenant_b"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); + const UUID client_id = generate_uuid(); + ReplicateConfig config; config.replica_num = 1; @@ -1572,13 +1637,14 @@ TEST_F(MasterServiceTest, TenantPutGetRemoveIsolatesSameUserKey) { } TEST_F(MasterServiceTest, RegexOperationsAreTenantScoped) { - std::unique_ptr service_(new MasterService()); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); - const UUID client_id = generate_uuid(); - const std::string key = "regex_shared_key"; const std::string tenant_a = "tenant_regex_a"; const std::string tenant_b = "tenant_regex_b"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*service_); + const UUID client_id = generate_uuid(); + ReplicateConfig config; config.replica_num = 1; @@ -1617,15 +1683,15 @@ TEST_F(MasterServiceTest, RegexOperationsAreTenantScoped) { } TEST_F(MasterServiceTest, TenantBatchUpsertAndRevokeAreScoped) { - auto svc = std::make_unique(); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); - const UUID client_id = generate_uuid(); - const std::vector keys = {"tenant_batch_upsert_key_a", "tenant_batch_upsert_key_b"}; const std::vector sizes = {1024, 2048}; const std::string tenant_a = "tenant_batch_upsert_a"; const std::string tenant_b = "tenant_batch_upsert_b"; + auto svc = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); + const UUID client_id = generate_uuid(); ReplicateConfig config; config.replica_num = 1; @@ -1671,13 +1737,13 @@ TEST_F(MasterServiceTest, TenantBatchUpsertAndRevokeAreScoped) { } TEST_F(MasterServiceTest, TenantBatchRemoveAndRemoveAllAreScoped) { - auto svc = std::make_unique(); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); - const UUID client_id = generate_uuid(); - const std::string shared_key = "tenant_batch_remove_shared_key"; const std::string tenant_a = "tenant_batch_remove_a"; const std::string tenant_b = "tenant_batch_remove_b"; + auto svc = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); + const UUID client_id = generate_uuid(); ReplicateConfig config; config.replica_num = 1; @@ -1714,13 +1780,13 @@ TEST_F(MasterServiceTest, TenantBatchRemoveAndRemoveAllAreScoped) { } TEST_F(MasterServiceTest, LegacyRemoveAllRemovesAllTenants) { - auto svc = std::make_unique(); - [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); - const UUID client_id = generate_uuid(); - const std::string key = "legacy_remove_all_shared_key"; const std::string tenant_a = "legacy_remove_all_a"; const std::string tenant_b = "legacy_remove_all_b"; + auto svc = std::make_unique( + MakeStrictTenantConfig({"default", tenant_a, tenant_b})); + [[maybe_unused]] const auto context = PrepareSimpleSegment(*svc); + const UUID client_id = generate_uuid(); ReplicateConfig config; config.replica_num = 1; @@ -4437,7 +4503,9 @@ TEST_F(MasterServiceTest, BatchExistKeyGroupedAndIncompletePreservesOrder) { } TEST_F(MasterServiceTest, BatchExistKeyTenantAwarePreservesOrder) { - std::unique_ptr service_(new MasterService()); + const std::string tenant_id = "tenant_batch_exist"; + auto service_ = std::make_unique( + MakeStrictTenantConfig({"default", tenant_id})); const UUID client_id = generate_uuid(); constexpr size_t buffer = 0x300000000; @@ -4447,7 +4515,7 @@ TEST_F(MasterServiceTest, BatchExistKeyTenantAwarePreservesOrder) { ReplicateConfig config; config.replica_num = 1; - const std::string tenant_id = "tenant_batch_exist"; + const std::string tenant_only_key = "batch_tenant_only"; const std::string default_only_key = "batch_default_only"; const std::string incomplete_key = "batch_tenant_incomplete"; @@ -4479,9 +4547,8 @@ TEST_F(MasterServiceTest, BatchExistKeyTenantAwarePreservesOrder) { } TEST_F(MasterServiceTest, WrappedBatchExistKeyUsesTenantAwareBatchPath) { - WrappedMasterServiceConfig service_config; - service_config.default_kv_lease_ttl = 100; - service_config.enable_metric_reporting = false; + const std::string tenant_id = "wrapped_batch_exist_tenant"; + auto service_config = MakeStrictWrappedConfig({"default", tenant_id}); WrappedMasterService service_(service_config); Segment segment = MakeSegment("wrapped_batch_exist_segment"); @@ -4490,7 +4557,6 @@ TEST_F(MasterServiceTest, WrappedBatchExistKeyUsesTenantAwareBatchPath) { ReplicateConfig config; config.replica_num = 1; - const std::string tenant_id = "wrapped_batch_exist_tenant"; const std::string tenant_key_a = "wrapped_batch_tenant_a"; const std::string tenant_key_b = "wrapped_batch_tenant_b"; const std::string default_only_key = "wrapped_batch_default_only"; @@ -5657,7 +5723,9 @@ TEST_F(MasterServiceTest, FetchTasksReturnsAssignedTasksOnlyAndDrainsQueue) { } TEST_F(MasterServiceTest, TenantTasksCarryTenantInPayload) { - auto service = std::make_unique(); + const std::string tenant_id = "tenant_for_async_task"; + auto service = + std::make_unique(MakeStrictTenantConfig({tenant_id})); const auto ctx0 = PrepareSimpleSegment(*service, "segment_0", 0x300000000, kDefaultSegmentSize); [[maybe_unused]] const auto ctx1 = PrepareSimpleSegment( @@ -5665,7 +5733,6 @@ TEST_F(MasterServiceTest, TenantTasksCarryTenantInPayload) { const UUID put_client_id = generate_uuid(); const std::string key = "tenant_task_key"; - const std::string tenant_id = "tenant_for_async_task"; ReplicateConfig config; config.replica_num = 1; diff --git a/mooncake-store/tests/promotion_on_hit_test.cpp b/mooncake-store/tests/promotion_on_hit_test.cpp index b030f4574f..2743c4e252 100644 --- a/mooncake-store/tests/promotion_on_hit_test.cpp +++ b/mooncake-store/tests/promotion_on_hit_test.cpp @@ -9,11 +9,17 @@ #include #include +#include +#include +#include #include #include #include #include +#include + +#include "tenant_quota_policy_store.h" #include "types.h" namespace mooncake::test { @@ -32,7 +38,13 @@ class PromotionOnHitTest : public ::testing::Test { FLAGS_logtostderr = true; } - void TearDown() override { google::ShutdownGoogleLogging(); } + void TearDown() override { + for (const auto& path : policy_files_) { + std::error_code ec; + std::filesystem::remove(path, ec); + } + google::ShutdownGoogleLogging(); + } // Friend access to MasterService::promotion_admission_threshold_, which // is otherwise private. PromotionOnHitTest is friended; TEST_F-generated @@ -44,6 +56,21 @@ class PromotionOnHitTest : public ::testing::Test { static constexpr size_t kDefaultSegmentBase = 0x300000000; + std::string WriteTenantQuotaPolicyFile( + const std::map& tenant_quotas) { + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = tenant_quotas; + auto path = + std::filesystem::temp_directory_path() / + ("mooncake_promotion_tenant_policy_" + std::to_string(::getpid()) + + "_" + std::to_string(next_policy_file_++) + ".yaml"); + std::ofstream out(path); + out << FormatTenantQuotaPolicyYaml(snapshot); + out.close(); + policy_files_.push_back(path.string()); + return path.string(); + } + Segment MakeSegment(std::string name, size_t base, size_t size) const { Segment segment; segment.id = generate_uuid(); @@ -116,6 +143,9 @@ class PromotionOnHitTest : public ::testing::Test { EXPECT_TRUE(mount_ld.has_value()); return client_id; } + + std::vector policy_files_; + size_t next_policy_file_ = 0; }; // Sanity: with promotion disabled, no path mutates promotion_objects. @@ -2035,17 +2065,21 @@ TEST_F(PromotionOnHitTest, MetricsRejectionCountersIncrementOnGateMiss) { TEST_F(PromotionOnHitTest, AdmissionFrequencyIsTenantScoped) { MasterServiceConfig config; config.enable_offload = true; + config.enable_multi_tenants = true; + config.tenant_quota_connector_type = "file"; config.promotion_on_hit = true; config.promotion_admission_threshold = 2; config.default_kv_lease_ttl = 2000; + const std::string key = "shared_hot_key"; + const std::string tenant_a = "tenant_promotion_a"; + const std::string tenant_b = "tenant_promotion_b"; + config.tenant_quota_connector_uri = WriteTenantQuotaPolicyFile( + {{tenant_a, 64 * 1024 * 1024}, {tenant_b, 64 * 1024 * 1024}}); auto service = std::make_unique(config); constexpr size_t seg_size = 1024 * 1024 * 16; auto seg = PrepareSegment(*service, "seg_tenant", kDefaultSegmentBase, seg_size); - const std::string key = "shared_hot_key"; - const std::string tenant_a = "tenant_promotion_a"; - const std::string tenant_b = "tenant_promotion_b"; ASSERT_TRUE(InjectLocalDiskReplica(*service, seg.client_id, key, 1024, seg.segment_name, tenant_a)); ASSERT_TRUE(InjectLocalDiskReplica(*service, seg.client_id, key, 1024, diff --git a/mooncake-store/tests/tenant_quota_test.cpp b/mooncake-store/tests/tenant_quota_test.cpp index c61e67ccb2..7faba55aff 100644 --- a/mooncake-store/tests/tenant_quota_test.cpp +++ b/mooncake-store/tests/tenant_quota_test.cpp @@ -1,11 +1,16 @@ #include "tenant_quota.h" +#include "tenant_quota_policy_store.h" #include "types.h" +#include +#include #include +#include #include #include #include +#include namespace mooncake { namespace { @@ -25,25 +30,31 @@ uint64_t SumEffectiveQuotas(const TenantQuotaTable& table) { return sum; } -void MakeInheritedTenantActive(TenantQuotaTable* table, - const std::string& tenant_id, - uint64_t capacity) { - ASSERT_TRUE(table->UpsertTenantPolicy(tenant_id, capacity).has_value()); - table->RecomputeEffectiveQuotas(capacity); - ASSERT_TRUE(table->Reserve(tenant_id, 1).has_value()); - ASSERT_TRUE(table->Commit(tenant_id, 1).has_value()); +std::filesystem::path MakeTempPolicyPath(const std::string& suffix) { + return std::filesystem::temp_directory_path() / + ("mooncake_tenant_quota_policy_store_test_" + + std::to_string(::getpid()) + "_" + suffix + ".yaml"); +} + +void MakeOrphanTenant(TenantQuotaTable* table, const std::string& tenant_id, + uint64_t bytes) { + ASSERT_TRUE(table->UpsertTenantPolicy(tenant_id, bytes).has_value()); + table->RecomputeEffectiveQuotas(bytes); + ASSERT_TRUE(table->Reserve(tenant_id, bytes).has_value()); + ASSERT_TRUE(table->Commit(tenant_id, bytes).has_value()); table->EraseTenantPolicy(tenant_id); } -TEST(TenantQuotaTableTest, NormalizesEmptyTenantIdToDefault) { +TEST(TenantQuotaTableTest, NormalizesEmptyExplicitTenantIdToDefault) { TenantQuotaTable table; - EXPECT_EQ(NormalizeTenantId(""), "default"); - MakeInheritedTenantActive(&table, "", 1024); - table.RecomputeEffectiveQuotas(1024); + ASSERT_TRUE(table.UpsertTenantPolicy("", 1024).has_value()); + table.RecomputeEffectiveQuotas(4096); auto snapshot = Snapshot(table, ""); EXPECT_EQ(snapshot.tenant_id, "default"); + EXPECT_TRUE(snapshot.has_explicit_policy); + EXPECT_EQ(snapshot.requested_quota_bytes, 1024); EXPECT_EQ(snapshot.effective_quota_bytes, 1024); } @@ -60,26 +71,18 @@ TEST(TenantQuotaTableTest, RejectsZeroExplicitQuotaWithoutChangingState) { EXPECT_EQ(snapshot.requested_quota_bytes, 100); } -TEST(TenantQuotaTableTest, ExplicitPolicyOverridesDefaultAndEraseFallsBack) { +TEST(TenantQuotaTableTest, EraseExplicitPolicyCreatesOrphanState) { TenantQuotaTable table; - table.SetDefaultRequestedQuota(50); - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(1000); - - EXPECT_TRUE(Snapshot(table, "tenant-a").has_explicit_policy); - EXPECT_EQ(Snapshot(table, "tenant-a").effective_quota_bytes, 100); - - ASSERT_TRUE(table.Reserve("tenant-a", 40).has_value()); - ASSERT_TRUE(table.Commit("tenant-a", 40).has_value()); - table.EraseTenantPolicy("tenant-a"); + MakeOrphanTenant(&table, "tenant-a", 40); table.RecomputeEffectiveQuotas(1000); auto snapshot = Snapshot(table, "tenant-a"); EXPECT_FALSE(snapshot.has_explicit_policy); - EXPECT_EQ(snapshot.requested_quota_bytes, 50); - EXPECT_EQ(snapshot.effective_quota_bytes, 1000); + EXPECT_EQ(snapshot.requested_quota_bytes, 0); + EXPECT_EQ(snapshot.effective_quota_bytes, 0); EXPECT_EQ(snapshot.used_bytes, 40); EXPECT_EQ(snapshot.committed_count, 1); + EXPECT_TRUE(snapshot.over_quota); } TEST(TenantQuotaTableTest, EraseMissingPolicyDoesNotCreateLazyState) { @@ -104,20 +107,6 @@ TEST(TenantQuotaTableTest, PolicyMutationDoesNotRecomputeEffectiveQuota) { EXPECT_EQ(Snapshot(table, "tenant-a").effective_quota_bytes, 200); } -TEST(TenantQuotaTableTest, - DefaultPolicyMutationDoesNotRecomputeEffectiveQuota) { - TenantQuotaTable table; - MakeInheritedTenantActive(&table, "default", 1000); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 1000); - - table.SetDefaultRequestedQuota(100); - EXPECT_EQ(Snapshot(table, "default").requested_quota_bytes, 100); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 1000); - - table.RecomputeEffectiveQuotas(500); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 500); -} - TEST(TenantQuotaTableTest, ListSnapshotsSortedAndSkipsLazyEmptyTenants) { TenantQuotaTable table; ASSERT_TRUE(table.Reserve("z-empty", 0).has_value()); @@ -131,312 +120,140 @@ TEST(TenantQuotaTableTest, ListSnapshotsSortedAndSkipsLazyEmptyTenants) { EXPECT_EQ(snapshots[1].tenant_id, "b"); } -TEST(TenantQuotaTableTest, SingleDefaultTenantReceivesFullCapacity) { - TenantQuotaTable table; - MakeInheritedTenantActive(&table, "default", 1234); - - table.RecomputeEffectiveQuotas(1234); - - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 1234); -} - -TEST(TenantQuotaTableTest, - ExplicitTenantsGetRequestedAndDefaultSharesRemainder) { +TEST(TenantQuotaTableTest, ExplicitTenantsReceiveRequestedWhenCapacityFits) { TenantQuotaTable table; ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - MakeInheritedTenantActive(&table, "default", 250); + ASSERT_TRUE(table.UpsertTenantPolicy("tenant-b", 200).has_value()); - table.RecomputeEffectiveQuotas(250); + table.RecomputeEffectiveQuotas(1000); EXPECT_EQ(Snapshot(table, "tenant-a").effective_quota_bytes, 100); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 150); -} - -TEST(TenantQuotaTableTest, DefaultTenantsSplitRemainderWithTenantIdTieBreak) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("explicit", 100).has_value()); - MakeInheritedTenantActive(&table, "b", 203); - MakeInheritedTenantActive(&table, "a", 203); - - table.RecomputeEffectiveQuotas(203); - - EXPECT_EQ(Snapshot(table, "explicit").effective_quota_bytes, 100); - EXPECT_EQ(Snapshot(table, "a").effective_quota_bytes, 52); - EXPECT_EQ(Snapshot(table, "b").effective_quota_bytes, 51); - EXPECT_LE(SumEffectiveQuotas(table), 203); + EXPECT_EQ(Snapshot(table, "tenant-b").effective_quota_bytes, 200); + EXPECT_EQ(SumEffectiveQuotas(table), 300); } TEST(TenantQuotaTableTest, OverCapacityScalesOnlyExplicitTenants) { TenantQuotaTable table; + MakeOrphanTenant(&table, "orphan", 20); ASSERT_TRUE(table.UpsertTenantPolicy("b", 200).has_value()); ASSERT_TRUE(table.UpsertTenantPolicy("a", 100).has_value()); - MakeInheritedTenantActive(&table, "default", 150); table.RecomputeEffectiveQuotas(150); EXPECT_EQ(Snapshot(table, "a").effective_quota_bytes, 50); EXPECT_EQ(Snapshot(table, "b").effective_quota_bytes, 100); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 0); -} - -TEST(TenantQuotaTableTest, LeavesRemainderUnallocatedWithoutDefaultTenants) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - - table.RecomputeEffectiveQuotas(1000); - - EXPECT_EQ(Snapshot(table, "tenant-a").effective_quota_bytes, 100); - EXPECT_EQ(SumEffectiveQuotas(table), 100); + EXPECT_EQ(Snapshot(table, "orphan").effective_quota_bytes, 0); + EXPECT_TRUE(Snapshot(table, "orphan").over_quota); } -TEST(TenantQuotaTableTest, DefaultUnlimitedTenantDoesNotSqueezeExplicitQuota) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("small", 1).has_value()); - MakeInheritedTenantActive(&table, "default", 10); - - table.RecomputeEffectiveQuotas(10); - - EXPECT_EQ(Snapshot(table, "small").effective_quota_bytes, 1); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 9); -} - -TEST(TenantQuotaTableTest, LazyEmptyTenantsDoNotDiluteActiveDefaultTenant) { +TEST(TenantQuotaTableTest, LazyEmptyOrphansDoNotAppearInList) { TenantQuotaTable table; ASSERT_TRUE(table.UpsertTenantPolicy("team-a", 30).has_value()); - MakeInheritedTenantActive(&table, "default", 100); ASSERT_TRUE(table.UpsertTenantPolicy("ghost", 10).has_value()); table.EraseTenantPolicy("ghost"); table.RecomputeEffectiveQuotas(100); EXPECT_EQ(Snapshot(table, "team-a").effective_quota_bytes, 30); - EXPECT_EQ(Snapshot(table, "default").effective_quota_bytes, 70); EXPECT_EQ(Snapshot(table, "ghost").effective_quota_bytes, 0); auto snapshots = table.ListTenantSnapshots(); - ASSERT_EQ(snapshots.size(), 2); - EXPECT_EQ(snapshots[0].tenant_id, "default"); - EXPECT_EQ(snapshots[1].tenant_id, "team-a"); -} - -TEST(TenantQuotaTableTest, LargestRemainderTieBreakUsesTenantId) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("b", 1).has_value()); - ASSERT_TRUE(table.UpsertTenantPolicy("a", 1).has_value()); - - table.RecomputeEffectiveQuotas(1); - - EXPECT_EQ(Snapshot(table, "a").effective_quota_bytes, 1); - EXPECT_EQ(Snapshot(table, "b").effective_quota_bytes, 0); -} - -TEST(TenantQuotaTableTest, CapacityShrinkAndGrowthRefreshOverQuota) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 80).has_value()); - ASSERT_TRUE(table.Commit("tenant-a", 80).has_value()); - - table.RecomputeEffectiveQuotas(50); - EXPECT_TRUE(Snapshot(table, "tenant-a").over_quota); - - table.RecomputeEffectiveQuotas(100); - EXPECT_FALSE(Snapshot(table, "tenant-a").over_quota); -} - -TEST(TenantQuotaTableTest, LargeValuesDoNotOverflowDuringRecompute) { - TenantQuotaTable table; - const uint64_t max = std::numeric_limits::max(); - ASSERT_TRUE(table.UpsertTenantPolicy("a", max).has_value()); - ASSERT_TRUE(table.UpsertTenantPolicy("b", max).has_value()); - - table.RecomputeEffectiveQuotas(max); - - EXPECT_EQ(Snapshot(table, "a").effective_quota_bytes, max / 2 + max % 2); - EXPECT_EQ(Snapshot(table, "b").effective_quota_bytes, max / 2); - EXPECT_LE(SumEffectiveQuotas(table), max); + ASSERT_EQ(snapshots.size(), 1); + EXPECT_EQ(snapshots[0].tenant_id, "team-a"); } -TEST(TenantQuotaTableTest, ReserveCommitUpdatesAccounting) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - - ASSERT_TRUE(table.Reserve("tenant-a", 40).has_value()); - auto snapshot = Snapshot(table, "tenant-a"); - EXPECT_EQ(snapshot.reserved_bytes, 40); - EXPECT_EQ(snapshot.used_bytes, 0); - - ASSERT_TRUE(table.Commit("tenant-a", 40).has_value()); - snapshot = Snapshot(table, "tenant-a"); - EXPECT_EQ(snapshot.reserved_bytes, 0); - EXPECT_EQ(snapshot.used_bytes, 40); - EXPECT_EQ(snapshot.committed_count, 1); +TEST(TenantQuotaPolicyStoreTest, ParsesValidYamlUnits) { + const char* yaml = R"yaml( +version: 1 + +tenants: + - name: tenant-a + quota: 200GB + - name: tenant-b + quota: 500MB + - name: experiment + quota: 12345 +)yaml"; + + auto snapshot = ParseTenantQuotaPolicyYaml(yaml); + + ASSERT_TRUE(snapshot.has_value()) << snapshot.error(); + EXPECT_EQ(snapshot->tenant_quotas.at("tenant-a"), + 200ULL * 1024 * 1024 * 1024); + EXPECT_EQ(snapshot->tenant_quotas.at("tenant-b"), 500ULL * 1024 * 1024); + EXPECT_EQ(snapshot->tenant_quotas.at("experiment"), 12345); } -TEST(TenantQuotaTableTest, ReserveOverQuotaDoesNotModifyState) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 80).has_value()); - - auto before = Snapshot(table, "tenant-a"); - auto result = table.Reserve("tenant-a", 21); - - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kQuotaExceeded); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); -} - -TEST(TenantQuotaTableTest, ReserveUsesOverflowSafeHeadroomCheck) { - TenantQuotaTable table; - const uint64_t max = std::numeric_limits::max(); - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", max).has_value()); - table.RecomputeEffectiveQuotas(max); - ASSERT_TRUE(table.Reserve("tenant-a", max).has_value()); - ASSERT_TRUE(table.Commit("tenant-a", max).has_value()); - - auto before = Snapshot(table, "tenant-a"); - auto result = table.Reserve("tenant-a", 1); - - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kQuotaExceeded); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); -} - -TEST(TenantQuotaTableTest, ReserveMissingTenantDoesNotCreateStateOnFailure) { - TenantQuotaTable table; - - auto result = table.Reserve("missing", 1); - - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kQuotaExceeded); - EXPECT_FALSE(table.GetTenantSnapshot("missing").has_value()); - EXPECT_TRUE(table.ListTenantSnapshots().empty()); -} - -TEST(TenantQuotaTableTest, ReserveAbortReleasesReservation) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - - ASSERT_TRUE(table.Reserve("tenant-a", 40).has_value()); - ASSERT_TRUE(table.Abort("tenant-a", 40).has_value()); - - EXPECT_EQ(Snapshot(table, "tenant-a").reserved_bytes, 0); -} - -TEST(TenantQuotaTableTest, - CommitWithoutEnoughReservationDoesNotModifyStateAndReportsMismatch) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 5).has_value()); - - auto before = Snapshot(table, "tenant-a"); - auto result = table.Commit("tenant-a", 10); - - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kAccountingMismatch); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); +TEST(TenantQuotaPolicyStoreTest, RejectsInvalidYamlPolicies) { + std::vector invalid_policies = { + "version: 2\n\ntenants: []\n", + "version: 1\n\ntenants:\n - name: tenant-a\n quota: 1XB\n", + "version: 1\n\ntenants:\n - name: tenant-a\n quota: 0\n", + "version: 1\n\ntenants:\n - name: \"\"\n quota: 1KB\n", + "version: 1\n\ntenants:\n - name: _system\n quota: 1KB\n", + "version: 1\n\ntenants:\n - name: \"tenant\\0bad\"\n quota: " + "1KB\n", + "version: 1\n\ntenants:\n - name: \"tenant\\nline\"\n quota: " + "1KB\n", + "version: 1\n\ntenants:\n - name: \"tenant\\x7f\"\n quota: 1KB\n", + "version: 1\n\ntenants:\n - name: tenant-a\n quota: 1KB\n - name: " + "tenant-a\n quota: 2KB\n", + "version: 1\n\ntenants:\n - name: tenant-a\n quota: " + "18446744073709551616\n", + "version: 1\n\ntenants:\n - name: tenant-a\n quota: " + "18446744073709551615TB\n", + }; + + for (const auto& policy : invalid_policies) { + auto snapshot = ParseTenantQuotaPolicyYaml(policy); + EXPECT_FALSE(snapshot.has_value()) << policy; + } } -TEST(TenantQuotaTableTest, - AbortWithoutEnoughReservationDoesNotModifyStateAndReportsMismatch) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 5).has_value()); - - auto before = Snapshot(table, "tenant-a"); - auto result = table.Abort("tenant-a", 10); +TEST(TenantQuotaPolicyStoreTest, RoundTripsYamlFile) { + const auto path = MakeTempPolicyPath("roundtrip"); + std::filesystem::remove(path); - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kAccountingMismatch); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); -} + YamlTenantQuotaPolicyStore store(path.string()); + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = {{"tenant-a", 1024}, {"tenant-b", 2048}}; -TEST(TenantQuotaTableTest, - ReleaseWithoutEnoughUsedDoesNotModifyStateAndReportsMismatch) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 5).has_value()); - ASSERT_TRUE(table.Commit("tenant-a", 5).has_value()); + auto save = store.Save(snapshot); + ASSERT_TRUE(save.has_value()) << save.error(); - auto before = Snapshot(table, "tenant-a"); - auto result = table.Release("tenant-a", 10); + auto loaded = store.Load(); + ASSERT_TRUE(loaded.has_value()) << loaded.error(); + EXPECT_EQ(loaded->tenant_quotas, snapshot.tenant_quotas); - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kAccountingMismatch); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); + std::filesystem::remove(path); } -TEST(TenantQuotaTableTest, ReleasePartialDoesNotChangeCommittedCount) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - ASSERT_TRUE(table.Reserve("tenant-a", 50).has_value()); - ASSERT_TRUE(table.Commit("tenant-a", 50).has_value()); +TEST(TenantQuotaPolicyStoreTest, RoundTripsYamlSpecialScalarNames) { + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = {{"foo#bar", 1}, + {"true", 2}, + {"[a, b]", 3}, + {"key: val", 4}, + {"quote\"slash\\", 5}}; - ASSERT_TRUE(table.ReleasePartial("tenant-a", 20).has_value()); + auto parsed = + ParseTenantQuotaPolicyYaml(FormatTenantQuotaPolicyYaml(snapshot)); - auto snapshot = Snapshot(table, "tenant-a"); - EXPECT_EQ(snapshot.used_bytes, 30); - EXPECT_EQ(snapshot.committed_count, 1); + ASSERT_TRUE(parsed.has_value()) << parsed.error(); + EXPECT_EQ(parsed->tenant_quotas, snapshot.tenant_quotas); } -TEST(TenantQuotaTableTest, - ReleasePartialUnderflowReportsMismatchInReleaseBuild) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); +TEST(TenantQuotaPolicyStoreTest, SaveFailureReturnsError) { + const auto path = MakeTempPolicyPath("missing-dir").parent_path() / + ("missing_dir_" + std::to_string(::getpid())) / + "policy.yaml"; + YamlTenantQuotaPolicyStore store(path.string()); -#ifdef NDEBUG - auto before = Snapshot(table, "tenant-a"); - auto result = table.ReleasePartial("tenant-a", 10); + TenantQuotaPolicySnapshot snapshot; + snapshot.tenant_quotas = {{"tenant-a", 1024}}; - ASSERT_FALSE(result.has_value()); - EXPECT_EQ(result.error(), TenantQuotaError::kAccountingMismatch); - auto after = Snapshot(table, "tenant-a"); - EXPECT_EQ(after.reserved_bytes, before.reserved_bytes); - EXPECT_EQ(after.used_bytes, before.used_bytes); - EXPECT_EQ(after.committed_count, before.committed_count); -#else - EXPECT_DEATH({ (void)table.ReleasePartial("tenant-a", 10); }, ""); -#endif -} - -TEST(TenantQuotaTableTest, ZeroByteAccountingOperationsAreNoOpSuccess) { - TenantQuotaTable table; - ASSERT_TRUE(table.UpsertTenantPolicy("tenant-a", 100).has_value()); - table.RecomputeEffectiveQuotas(100); - - EXPECT_TRUE(table.Reserve("tenant-a", 0).has_value()); - EXPECT_TRUE(table.Commit("tenant-a", 0).has_value()); - EXPECT_TRUE(table.Abort("tenant-a", 0).has_value()); - EXPECT_TRUE(table.Release("tenant-a", 0).has_value()); - EXPECT_TRUE(table.ReleasePartial("tenant-a", 0).has_value()); - - auto snapshot = Snapshot(table, "tenant-a"); - EXPECT_EQ(snapshot.used_bytes, 0); - EXPECT_EQ(snapshot.reserved_bytes, 0); - EXPECT_EQ(snapshot.committed_count, 0); + auto save = store.Save(snapshot); + EXPECT_FALSE(save.has_value()); } } // namespace