diff --git a/_includes/code/howto/namespaces.py b/_includes/code/howto/namespaces.py new file mode 100644 index 00000000..b0d879f6 --- /dev/null +++ b/_includes/code/howto/namespaces.py @@ -0,0 +1,199 @@ +# How-to: Manage namespaces — Python examples. +# +# Requires Weaviate v1.38.0+ with NAMESPACES_ENABLED=true, and the Python +# client release that adds namespace support (PR +# weaviate/weaviate-python-client#2033). Examples connect as a global +# (operator) principal; the `client.namespaces.*` endpoints are +# operator-only. + +import os +import time + +import weaviate +from weaviate.auth import Auth +from weaviate.rbac.models import Permissions + + +# ========================================== +# ===== Connect as the operator ===== +# ========================================== + +# START Connect +client = weaviate.connect_to_local( + auth_credentials=Auth.api_key(os.environ["OPERATOR_API_KEY"]), +) +# END Connect + +# Clean up any leftovers from a previous run so the script is idempotent. +for cleanup_ns in ("customer1",): + existing = client.namespaces.get(name=cleanup_ns) + if existing is not None and existing.state != "deleting": + client.namespaces.delete(name=cleanup_ns) + # Wait for the two-phase cascade cleanup to finish. + deadline = time.time() + 60 + while client.namespaces.get(name=cleanup_ns) is not None and time.time() < deadline: + time.sleep(0.5) +for cleanup_role in ("namespaceUser", "namespace_admin", "all_namespace_admin"): + try: + client.roles.delete(role_name=cleanup_role) + except Exception: + pass + + +# ========================================== +# ===== Create a namespace ===== +# ========================================== + +# START CreateNamespace +# Omit home_node to let the cluster pick automatically. To pin the +# namespace's shards to a specific node, pass home_node="" +# where is a current storage candidate from +# client.cluster.nodes(). +ns = client.namespaces.create(name="customer1") +print(ns.name, ns.home_node, ns.state) +# → customer1 active +# END CreateNamespace + + +# ========================================== +# ===== Get a single namespace ===== +# ========================================== + +# START GetNamespace +ns = client.namespaces.get(name="customer1") +if ns is None: + print("not found") +else: + print(ns.name, ns.home_node, ns.state) +# END GetNamespace + + +# ========================================== +# ===== List all namespaces ===== +# ========================================== + +# START ListNamespaces +for ns in client.namespaces.list_all(): + print(ns.name, ns.state) +# END ListNamespaces + + +# ========================================== +# ===== Update the home node ===== +# ========================================== + +# START UpdateNamespace +# Updating home_node only affects future shard placements — existing +# shards are not moved. Pick a storage candidate from +# client.cluster.nodes(). +target_node = client.cluster.nodes()[0].name +client.namespaces.update(name="customer1", home_node=target_node) +# END UpdateNamespace + + +# ========================================== +# ===== Create a role for namespaced principals ===== +# ========================================== + +# START NamespaceUserRole +# Define the role once at the global level — the RBAC matcher fans the +# collection pattern out per caller's namespace at request time. Do NOT +# hard-code `customer1:` into role definitions. +client.roles.create( + role_name="namespaceUser", + permissions=( + Permissions.collections( + collection="*", + create_collection=True, + read_config=True, + update_config=True, + delete_collection=True, + ) + + Permissions.data( + collection="*", create=True, read=True, update=True, delete=True + ) + ), +) +# END NamespaceUserRole + + +# ========================================== +# ===== Create a namespaced DB user ===== +# ========================================== + +# START CreateNamespacedUser +# Bind the new DB user to a namespace via the `namespace=` argument. The user +# is stored internally as `customer1:api_user` and can only see resources in +# `customer1`. +api_key = client.users.db.create(user_id="api_user", namespace="customer1") +print(api_key) +# END CreateNamespacedUser + + +# ========================================== +# ===== Assign the role to the namespaced user ===== +# ========================================== + +# START AssignNamespaceUserRole +# Address the namespaced user by its fully-qualified internal name. +client.users.db.assign_roles( + user_id="customer1:api_user", role_names=["namespaceUser"] +) +# END AssignNamespaceUserRole + + +# ========================================== +# ===== What the namespaced user sees ===== +# ========================================== + +# START NamespacedUserView +# Connect as the namespaced user (using the api_key from when the user +# was created) and create a collection with a SHORT name — no +# `customer1:` prefix. +ns_client = weaviate.connect_to_local( + auth_credentials=Auth.api_key(api_key), +) +ns_client.collections.create(name="Movies") + +# Listing the schema strips the namespace prefix: +for name in ns_client.collections.list_all(): + print(name) +# → Movies + +ns_client.close() +# END NamespacedUserView + + +# ========================================== +# ===== Define a role with namespace-management permissions ===== +# ========================================== + +# START NamespacePermissions +# `manage_namespaces` is the operator-only RBAC permission for +# /v1/namespaces CRUD. Scope it to a specific namespace or to all (`*`). +client.roles.create( + role_name="namespace_admin", + permissions=Permissions.namespaces(namespace="customer1", manage=True), +) + +# Wildcard — manage any namespace +client.roles.create( + role_name="all_namespace_admin", + permissions=Permissions.namespaces(namespace="*", manage=True), +) +# END NamespacePermissions + + +# ========================================== +# ===== Delete a namespace ===== +# ========================================== + +# START DeleteNamespace +# Two-phase: the namespace flips to `state: deleting`, then a background +# cascade removes its DB users, aliases, and collections. Idempotent — +# repeated calls during cleanup return without error. +client.namespaces.delete(name="customer1") +# END DeleteNamespace + + +client.close() diff --git a/_includes/feature-notes/namespaces.mdx b/_includes/feature-notes/namespaces.mdx new file mode 100644 index 00000000..fcc534ea --- /dev/null +++ b/_includes/feature-notes/namespaces.mdx @@ -0,0 +1,5 @@ +:::caution Preview — added in `v1.38` + +This is a preview feature. The API may change in future releases. + +::: diff --git a/_includes/namespaces-overview.mdx b/_includes/namespaces-overview.mdx new file mode 100644 index 00000000..5f014e8d --- /dev/null +++ b/_includes/namespaces-overview.mdx @@ -0,0 +1,20 @@ +A **namespace** is a cluster-level isolation boundary: every collection and alias belongs to **exactly one** namespace, so one physical Weaviate cluster can host many isolated logical "customer clusters" without anything leaking between them. + +For example, a user in namespace `customer1` creates a collection called `Movies`. The cluster stores it under the qualified name `customer1:Movies`, but that user only ever sees the short name `Movies`. A user in another namespace can create their own `Movies` collection without collision. + +Each namespace is an entity with three attributes: + +- A **name** (`customer1`) — immutable, lowercase ASCII plus digits and hyphens, 3–36 characters. +- A **home node** that every collection in the namespace pins all its shards to (each namespace runs at replication factor 1). +- A **state**, either `active` or `deleting`. + +On a namespace-enabled cluster the `:` character is **reserved** — collection, alias, role, and user IDs cannot contain it, so the split between a namespace and a short name is always unambiguous. + +Terminology used throughout these pages: + +- **Principal** — an authenticated identity making a request. On a namespace-enabled cluster every principal is either *namespaced* or *global*. + - **Namespaced principal** — a caller bound to a single namespace (a dynamic DB user, or an OIDC user whose token carries a namespace claim). Sees and submits short names like `Movies`. + - **Global principal** — a cluster-wide caller (a static API key, or an OIDC user with the global claim). Sees and must submit fully-qualified names like `customer1:Movies`. +- **Operator** — a global principal with administrative permissions (such as `manage_namespaces`) who creates and manages namespaces and their users. +- **Name resolution** — how Weaviate maps between the short names a namespaced principal uses (`Movies`) and the qualified names stored on the cluster (`customer1:Movies`), stripping the prefix from responses automatically. +- **Home node** — the single node a namespace's shards are pinned to. diff --git a/docs/deploy/configuration/env-vars/index.md b/docs/deploy/configuration/env-vars/index.md index 3db54c30..533c9b5f 100644 --- a/docs/deploy/configuration/env-vars/index.md +++ b/docs/deploy/configuration/env-vars/index.md @@ -36,6 +36,7 @@ import APITable from '@site/src/components/APITable'; | `DEFAULT_SHARDING_COUNT` | Default `desiredCount` for new single-tenant collections, used when the collection definition does not specify one. An explicit `desiredCount` in the class creation request still takes precedence. A value of `0` (default) uses the cluster node count. Multi-tenant collections are unaffected. Must be `<= 512`. Runtime-configurable. Default: `0`
Added in `v1.37` | `string - number` | `12` | | `DEFAULT_VECTORIZER_MODULE` | Default vectorizer module - can be overridden by the vectorizer in the collection definition. | `string` | `text2vec-contextionary` | | `API_BASED_MODULES_DISABLED` | Weaviate automatically enables the usage of all [API based modules](../../../weaviate/model-providers/index.md#api-based). Set this variable to `true` in order to limit access and only allow specific modules through the [`ENABLE_MODULES`](#ENABLE_MODULES) variable. Default: `false`
Added in `v1.33` | `boolean` | `true` | +| `DISABLE_GRAPHQL` | If `true`, disable the GraphQL API cluster-wide; use the REST and gRPC APIs instead. Required when [namespaces](/weaviate/concepts/namespaces.md) are enabled. Default: `false` | `boolean` | `true` | | `DISABLE_LAZY_LOAD_SHARDS` | When `false`, enable lazy shard loading to improve mean time to recovery in multi-tenant deployments. **Deprecated in `v1.36.6`.** Use `LAZY_LOAD_SHARD_COUNT_THRESHOLD` and `LAZY_LOAD_SHARD_SIZE_THRESHOLD_GB` instead. Weaviate now auto-detects when lazy loading is needed per collection. | `string` | `false` | | `DISABLE_TELEMETRY` | Disable [telemetry](/deploy/configuration/telemetry.md) data collection | boolean | `false` | | `DISK_USE_READONLY_PERCENTAGE` | If disk usage is higher than the given percentage all shards on the affected node will be marked as `READONLY`, meaning all future write requests will fail. See [Disk Pressure Warnings and Limits for details](/deploy/configuration/persistence.md#disk-pressure-warnings-and-limits). | `string - number` | `90` | @@ -72,6 +73,7 @@ import APITable from '@site/src/components/APITable'; | `MEMORY_READONLY_PERCENTAGE` | If memory usage is higher than the given percentage all shards on the affected node will be marked as `READONLY`, meaning all future write requests will fail. (Default: `0` - i.e. no limit) | `string - number` | `75` | | `MEMORY_WARNING_PERCENTAGE` | If memory usage is higher than the given percentage a warning will be logged by all shards on the affected node's disk. (Default: `0` - i.e. no limit) | `string - number` | `85` | | `MODULES_CLIENT_TIMEOUT` | Timeout for requests to Weaviate modules. Default: `50s` | `string - duration` | `5s`, `10m`, `1h` | +| `NAMESPACES_ENABLED` | If `true`, enable cluster-level [namespace](/weaviate/concepts/namespaces.md) isolation (`v1.38` Preview). Requires `DISABLE_GRAPHQL=true` and `REPLICATION_MAXIMUM_FACTOR=1`. Supported only on new clusters — cannot be enabled on a cluster with pre-existing non-namespaced collections, and a cluster that has run with it `true` cannot be downgraded. Default: `false`
Added in `v1.38` | `boolean` | `true` | | `OBJECTS_TTL_BATCH_SIZE` | Number of objects deleted per batch during TTL cleanup. With the default pause settings, a pause occurs every `OBJECTS_TTL_BATCH_SIZE * OBJECTS_TTL_PAUSE_EVERY_NO_BATCHES` objects (100,000 by default). Can be modified at runtime. Default: `10000`
Added in `v1.36` | `string - number` | `10000` | | `OBJECTS_TTL_CONCURRENCY_FACTOR` | Controls the concurrency of the TTL deletion process as a multiplier. Higher values use more resources but delete faster. Must be greater than 0. Can be modified at runtime. Default: `1`
Added in `v1.36` | `string - number` | `1` | | `OBJECTS_TTL_DELETE_SCHEDULE` | Schedule for deleting expired objects. Accepts standard 5-field cron format, 6-field (with seconds), 7-field (with seconds and year), descriptors (`@yearly`, `@monthly`, `@weekly`, `@daily`, `@hourly`), or hash expressions. Default: `""` (disabled)
Added in `v1.36` | `string - cron format` | `0 */6 * * *` (every 6 hours) | @@ -184,6 +186,8 @@ For more information on authentication and authorization, see the [Authenticatio | `AUTHENTICATION_OIDC_SCOPES` | OIDC scopes to request | `string - comma-separated list` | `openid,email` | | `AUTHENTICATION_OIDC_SKIP_CLIENT_ID_CHECK` | Skip OIDC Client ID check | `boolean` | `false` | | `AUTHENTICATION_OIDC_USERNAME_CLAIM` | OIDC Username Claim | `string` | `email` | +| `OIDC_NAMESPACE_CLAIM` | On namespace-enabled clusters (`NAMESPACES_ENABLED=true`), name of the OIDC token claim that carries the principal's namespace. See [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification).
Added in `v1.38` | `string` | `namespace` | +| `OIDC_GLOBAL_PRINCIPAL_CLAIM` | On namespace-enabled clusters, name of the OIDC token claim that marks the principal as global (boolean). Mutually exclusive with `OIDC_NAMESPACE_CLAIM` on a per-token basis — see [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification).
Added in `v1.38` | `string` | `is_global` | | `AUTHORIZATION_ADMINLIST_ENABLED` | Enable AdminList authorization scheme (mutually exclusive with `AUTHORIZATION_RBAC_ENABLED`) | `boolean` | `true` | | `AUTHORIZATION_ADMINLIST_USERS` | Users with admin permission when AdminList scheme used | `string - comma-separated list` | `jane@example.com,john@example.com` | | `AUTHORIZATION_ADMINLIST_READONLY_USERS` | Users with read-only permission when AdminList scheme used | `string - comma-separated list` | `alice@example.com,dave@example.com` | @@ -236,6 +240,7 @@ For more information on authentication and authorization, see the [Authenticatio | `REPLICA_MOVEMENT_MINIMUM_ASYNC_WAIT` | How long replica movement waits after file copy but before finalizing the move in order for in progress writes to finish. Default: `60` seconds
Added in `v1.32` | `string - number` | `90` | | `REPLICATED_INDICES_REQUEST_QUEUE_ENABLED` | Enable/disable the request queue buffer for replicated indices in multi-node clusters. Can be modified at runtime. Default: `false` | `boolean` | `true` | | `REPLICATION_ENGINE_MAX_WORKERS` | The number of workers to process replica movements in parallel. Default: `10`
Added in `v1.32` | `string - number` | `5` | +| `REPLICATION_MAXIMUM_FACTOR` | The maximum replication factor for all collections in the cluster. | `string - number` | `1` | | `REPLICATION_MINIMUM_FACTOR` | The minimum replication factor for all collections in the cluster. | `string - number` | `3` | ```mdx-code-block diff --git a/docs/deploy/configuration/oidc.md b/docs/deploy/configuration/oidc.md index 81139445..e13fbf5a 100644 --- a/docs/deploy/configuration/oidc.md +++ b/docs/deploy/configuration/oidc.md @@ -189,6 +189,24 @@ Configuring the OIDC token issuer is outside the scope of Weaviate's configurati By default, Weaviate validates that the token includes a specified client id in the audience claim. If your token issuer does not support this feature, you can turn it off as outlined in the [authentication configuration](./authentication.md#oidc-docker). ::: +## OIDC on namespace-enabled clusters + +On clusters with `NAMESPACES_ENABLED=true` ([namespaces](/weaviate/concepts/namespaces.md)), every OIDC token is classified as either a **namespaced** or **global** principal. Two env vars select which claims drive the classification: + +| Env var | Purpose | +|---|---| +| `OIDC_NAMESPACE_CLAIM` | Name of the claim that holds the namespace string (e.g. `namespace`). | +| `OIDC_GLOBAL_PRINCIPAL_CLAIM` | Name of the claim that holds the global-principal boolean (e.g. `is_global`). | + +A token must select exactly one classification. The server **rejects** tokens that: + +- Carry both a namespace claim and the global-principal claim set to `true`. +- Carry neither claim on a namespace-enabled cluster. +- Name a namespace that doesn't exist (Weaviate never auto-creates). +- Combine a namespace claim with a `root` group from `AUTHORIZATION_RBAC_ROOT_GROUPS` / `_USERS` — `root` is cluster-global and cannot coexist with a namespace. + +On clusters where `NAMESPACES_ENABLED=false`, presence of either claim in the token causes the request to be rejected. See [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification) for the full rules. + ## Questions and feedback import DocsFeedback from '/_includes/docs-feedback.mdx'; diff --git a/docs/weaviate/concepts/data.md b/docs/weaviate/concepts/data.md index c680abe9..3147a91e 100644 --- a/docs/weaviate/concepts/data.md +++ b/docs/weaviate/concepts/data.md @@ -324,6 +324,12 @@ For details on configuring your schema, see the [schema tutorial](../starter-gui To separate data within a cluster, use multi-tenancy. Weaviate partitions the cluster into shards. Each shard holds data for a single tenant. +:::info Multi-tenancy is different from namespaces + +[Multi-tenancy](../manage-collections/multi-tenancy.mdx) isolates **data within one collection**. [Namespaces](./namespaces.md) isolate **collections across the cluster**. They're orthogonal — a multi-tenant collection inside a namespace works as expected. + +::: + ```mermaid %%{init: {'theme': 'base', 'themeVariables': { 'background': '#f5f5f5' }}}%% flowchart TB diff --git a/docs/weaviate/concepts/index.md b/docs/weaviate/concepts/index.md index 7747d161..e26e2b75 100644 --- a/docs/weaviate/concepts/index.md +++ b/docs/weaviate/concepts/index.md @@ -42,6 +42,10 @@ import AcademyAdmonition from '@site/src/components/AcademyAdmonition'; - Read more about Weaviate's vector quantization options. +**[Namespaces](./namespaces.md)** + +- Cluster-level isolation where every collection and alias belongs to exactly one namespace, letting one cluster host many isolated logical "customer clusters". + ## Weaviate Architecture The figure below gives a 30,000 feet view of Weaviate's architecture. diff --git a/docs/weaviate/concepts/namespaces.md b/docs/weaviate/concepts/namespaces.md new file mode 100644 index 00000000..5562f54f --- /dev/null +++ b/docs/weaviate/concepts/namespaces.md @@ -0,0 +1,139 @@ +--- +title: Namespaces +sidebar_position: 8 +image: og/docs/concepts.jpg +description: Namespaces in Weaviate provide cluster-level isolation — every collection and alias belongs to exactly one namespace. One physical cluster can host many isolated logical "customer clusters" with separate users, schemas, and quotas. +# tags: ['namespaces', 'multi-tenancy', 'isolation'] +--- + +import NamespacesPreview from '/_includes/feature-notes/namespaces.mdx'; +import NamespacesOverview from '/_includes/namespaces-overview.mdx'; + + + + + +## Namespaces vs multi-tenancy + +These are **two different features** that sound similar. They are orthogonal and you can use both at once. + +| | Namespaces | Multi-tenancy | +|---|---|---| +| **Scope** | The whole cluster | A single collection | +| **What it isolates** | Collections, aliases, principals, quotas | Object data within one collection | +| **Naming** | Short names map to `namespace:CollectionName` storage | All tenants share the same collection name | +| **Use case** | "One Weaviate cluster, many customer environments" | "One collection, many end-users with isolated rows" | +| **Enabled by** | `NAMESPACES_ENABLED=true` cluster-wide | `MultiTenancyConfig` on the collection | + +A multi-tenant collection inside a namespace works exactly as before: the namespace scopes the *collection*, and multi-tenancy scopes the *data within the collection*. + +## Cluster prerequisites + +Namespace mode is opt-in and only supported on new clusters. Three server-level invariants are checked at startup: + +| Setting | Required value | Why | +|---|---|---| +| [`NAMESPACES_ENABLED`](/deploy/configuration/env-vars/index.md#NAMESPACES_ENABLED) | `true` | Master switch. Off by default. | +| [`DISABLE_GRAPHQL`](/deploy/configuration/env-vars/index.md#DISABLE_GRAPHQL) | `true` | GraphQL introspection cannot be safely scoped per namespace. The two flags are validated together at boot. | +| [`REPLICATION_MAXIMUM_FACTOR`](/deploy/configuration/env-vars/index.md#REPLICATION_MAXIMUM_FACTOR) | `1` | Every collection in a namespace runs at RF=1 on its single home node. Multi-replica namespaces are out of scope for this preview. | + +The server refuses to start if: + +- `NAMESPACES_ENABLED=true` is set on a cluster that already has non-namespaced collections. +- `NAMESPACES_ENABLED=false` is set on a cluster that already has namespace entities or namespace-qualified collections. + +:::warning Downgrade is not supported + +A cluster that has ever run with `NAMESPACES_ENABLED=true` cannot be downgraded to a pre-namespace binary. Plan accordingly. + +::: + +### Naming rules + +- 3–36 characters +- `[a-z0-9][a-z0-9-]*[a-z0-9]` — lowercase ASCII, digits, and hyphens, and cannot start or end with a hyphen +- Cannot contain `:` (the namespace separator, reserved cluster-wide) +- Not in the reserved list: `admin`, `system`, `default`, `internal`, `weaviate`, `global`, `public` + +The name is **immutable** after create. To rename, delete the namespace (which cascades to all its collections, aliases, and users) and create a new one. + +## Principals: namespaced vs global + +Every authenticated request on a namespace-enabled cluster resolves to **exactly one** of two principal kinds: + +| Source | Classification | +|---|---| +| **Dynamic DB user** — created via `POST /v1/users/db/{user_id}` | Always **namespaced**. The target namespace is set at create time and cannot change. | +| **Static API key** — configured via `AUTHENTICATION_APIKEY_USERS` + `_ALLOWED_KEYS` | Always **global**. Operator/bootstrap only — not exposed to namespaced principals in managed deployments. | +| **OIDC user** | Classified by token claims — see below. | + +### OIDC classification + +Two server env vars name the claims used to classify OIDC tokens: + +- [`OIDC_NAMESPACE_CLAIM`](/deploy/configuration/env-vars/index.md#OIDC_NAMESPACE_CLAIM) — the claim holding the namespace string. +- [`OIDC_GLOBAL_PRINCIPAL_CLAIM`](/deploy/configuration/env-vars/index.md#OIDC_GLOBAL_PRINCIPAL_CLAIM) — the claim holding the global-principal boolean. + +A token must resolve to exactly one classification. A non-empty namespace claim with no global claim (or `false`) is accepted as **namespaced**. The global claim set to `true` with no namespace claim is accepted as **global**. Weaviate **rejects** the token when both claims are set (ambiguous), when neither is set, when the namespace claim names a namespace that doesn't exist (Weaviate never auto-creates them), or when a namespace claim is combined with `root` (via `AUTHORIZATION_RBAC_ROOT_GROUPS` / `_USERS`), since `root` is cluster-global and cannot coexist with a namespace. On clusters with `NAMESPACES_ENABLED=false`, presence of either claim causes the request to be rejected. + +### What each kind can do + +| | Namespaced | Global | +|---|---|---| +| Create / list / delete collections | ✓ — via short names | ✗ — global principals cannot create collections on namespace-enabled clusters | +| Read / update / delete existing collections | ✓ — via short names in their own namespace | ✓ — via fully-qualified `ns:Name` only | +| `GET /v1/namespaces` | RBAC-filtered (typically empty) | ✓ | +| Create / delete namespaces | ✗ | ✓ (operator-only RBAC) | +| `/v1/backups`, `/v1/replication`, `/v1/nodes` | Blocked via RBAC | ✓ | +| GraphQL | Disabled cluster-wide | Disabled cluster-wide | + +### Built-in roles on namespace-enabled clusters + +The four built-in roles split into two classes: + +- **`root`, `read-only`** — env-var-only operator roles. Not assignable through the role-assignment API. Reserved for explicit global principals. +- **`admin`, `viewer`** — assignable built-ins safe to grant to namespaced principals, narrowed to allowlists over objects/data, collections/schema, multi-tenancy. `viewer` = read/list, `admin` = CRUD within those families. + +## Name resolution + +Take a collection stored as `customer1:Movies`. A **namespaced principal** sees the short name `Movies`. It submits `Movies` (which Weaviate auto-qualifies to `customer1:Movies`), and submitting the qualified `customer1:Movies` directly is rejected. A **global principal** sees the qualified `customer1:Movies` with no stripping, and must submit `customer1:Movies` — short names fall through the not-found path. + +For a namespaced caller, Weaviate strips the namespace prefix from responses **at the source** — the point where the response is built. +Stripping uses the caller's **own** namespace. A namespaced caller can never observe another namespace's prefix. The worst-case "leak" would be their own prefix surfacing in a missed strip site. + +### References across namespaces don't work + +Beacons store the **short** target name in payload — `weaviate://localhost/Movies/uuid`, not `customer1:Movies`. At read time the namespace is resolved from the **source collection**, not from the calling principal. Cross-namespace references therefore don't work: a reference from `customer1:Books → Movies` always resolves to `customer1:Movies`, even when a global principal navigates it. + +## Limits + +| Limit | Value | Notes | +|---|---|---| +| Replication factor | `1` per namespace | All shards land on the namespace's `home_node`. Updates to `home_node` apply only to *new* shards — existing shards are not moved. | +| Object count per namespace | Soft business control | Evaluated on the home node before write. Updates and deletes are always allowed regardless of quota. Quota is async — small overshoots are expected. | +| Collection count per namespace | Reinterprets `MAXIMUM_ALLOWED_COLLECTIONS_COUNT` as **per-namespace** on NS-enabled clusters | Checked at schema-create time. | +| Vector dimensions per namespace | Reserved in design | Not yet enforced in Phase 1. | + +## Cross-feature interactions + +| Feature | Behavior on namespace-enabled clusters | +|---|---| +| **Auto-schema** | A namespaced principal who triggers auto-schema creates the resulting collection in their own namespace. | +| **Filter parser** | Collection names in filter paths are resolved through the namespacing resolver — namespaced callers use short names. | +| **MCP server** | Both `weaviate-get-collection-config` and `weaviate-query-hybrid` resolve short names via the namespacing resolver. Namespaced principals can use MCP tools transparently. | +| **Multi-tenancy** | Orthogonal. A multi-tenant collection inside a namespace works as expected, and per-tenant data isolation is preserved within the namespace's collection. | +| **Aliases** | Scoped to a namespace. An alias in `customer1` resolves to a collection in `customer1`. | +| **Audit logging** | Every operation includes the namespace in audit entries — emitted as separate `namespace=` and `collection=` fields (not concatenated), so downstream tooling can filter on namespace without string parsing. | + +## Related pages + +- [Manage namespaces](../configuration/namespaces.mdx) — operator how-to: enable, create, delete, bootstrap a DB user. +- [Multi-tenancy](../manage-collections/multi-tenancy.mdx) — per-collection tenant isolation (different feature). +- [RBAC](../configuration/rbac/index.mdx) — how the namespaced/global principal split affects roles. +- [Authentication](/deploy/configuration/authentication.md) and [OIDC](/deploy/configuration/oidc.md) — claim configuration for namespace classification. + +## Questions and feedback + +import DocsFeedback from '/\_includes/docs-feedback.mdx'; + + diff --git a/docs/weaviate/configuration/index.mdx b/docs/weaviate/configuration/index.mdx index 40bc472d..e27dd136 100644 --- a/docs/weaviate/configuration/index.mdx +++ b/docs/weaviate/configuration/index.mdx @@ -39,6 +39,13 @@ export const configOpsData = [ link: "/weaviate/configuration/modules", icon: "fas fa-puzzle-piece", }, + { + title: "Namespaces", + description: + "Cluster-level isolation where every collection and alias belongs to exactly one namespace, so one cluster can host many isolated customer environments.", + link: "/weaviate/configuration/namespaces", + icon: "fas fa-layer-group", + }, { title: "MCP server", description: diff --git a/docs/weaviate/configuration/namespaces.mdx b/docs/weaviate/configuration/namespaces.mdx new file mode 100644 index 00000000..54e39fdd --- /dev/null +++ b/docs/weaviate/configuration/namespaces.mdx @@ -0,0 +1,239 @@ +--- +title: Manage namespaces +sidebar_label: Namespaces +image: og/docs/configuration.jpg +description: Enable namespaces on a Weaviate cluster, create and manage namespace entities via the REST API, and bootstrap a namespaced DB user. +# tags: ['namespaces', 'operations', 'multi-tenancy', 'rbac'] +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/howto/namespaces.py"; + +import NamespacesPreview from '/_includes/feature-notes/namespaces.mdx'; +import NamespacesOverview from '/_includes/namespaces-overview.mdx'; + + + +Operator how-to for the **namespaces** feature — a cluster-level isolation boundary. For the conceptual model and how it differs from per-collection [multi-tenancy](../manage-collections/multi-tenancy.mdx), see [Namespaces](../concepts/namespaces.md). This page covers what an operator does: enable namespaces on a cluster, manage namespace entities, and bootstrap a namespaced DB user. + +
+ + New to namespaces? Concepts and terminology + + + + +
+ +## Enable namespaces on a new cluster + +Namespaces are only supported on **new clusters**. Set the following env vars before first start: + +- [`NAMESPACES_ENABLED`](/deploy/configuration/env-vars/index.md#NAMESPACES_ENABLED) — set to `true`. The master switch. +- [`DISABLE_GRAPHQL`](/deploy/configuration/env-vars/index.md#DISABLE_GRAPHQL) — set to `true`. GraphQL cannot be safely scoped per namespace. +- [`REPLICATION_MAXIMUM_FACTOR`](/deploy/configuration/env-vars/index.md#REPLICATION_MAXIMUM_FACTOR) — set to `1`. Every collection in a namespace runs at RF=1 on its home node. + +The server validates these at boot. It refuses to start if `NAMESPACES_ENABLED=true` is set on a cluster that already has non-namespaced collections, or if `NAMESPACES_ENABLED=false` is set on a cluster that already has namespace entities. + +For OIDC clusters, also set the claim names used to classify tokens (see [Namespaces concept page](../concepts/namespaces.md#oidc-classification) for the rules): + +- [`OIDC_NAMESPACE_CLAIM`](/deploy/configuration/env-vars/index.md#OIDC_NAMESPACE_CLAIM) — name of the claim holding the namespace string (e.g. `namespace`). +- [`OIDC_GLOBAL_PRINCIPAL_CLAIM`](/deploy/configuration/env-vars/index.md#OIDC_GLOBAL_PRINCIPAL_CLAIM) — name of the claim holding the global-principal boolean (e.g. `is_global`). + +:::warning Downgrade is not supported + +A cluster that has ever run with `NAMESPACES_ENABLED=true` cannot be downgraded to a pre-namespace binary. There is no migration tool to enable namespaces on an existing cluster or move a namespace between clusters in this preview. + +::: + +## Namespace REST endpoints + +All `/v1/namespaces` endpoints are **operator-only** and require the `manage_namespaces` permission. The whole surface returns `404 Not Found` when `NAMESPACES_ENABLED=false`. + +| Verb | Path | Purpose | +|---|---|---| +| `GET` | `/v1/namespaces` | List namespaces (RBAC-filtered). | +| `GET` | `/v1/namespaces/{namespace_id}` | Get a namespace. | +| `POST` | `/v1/namespaces/{namespace_id}` | Create a namespace. | +| `PUT` | `/v1/namespaces/{namespace_id}` | Update `home_node` only. `name` and `state` are immutable through this endpoint. | +| `DELETE` | `/v1/namespaces/{namespace_id}` | Two-phase deletion (flip to `deleting`, then cascade cleanup). Idempotent. | + +The examples below connect as a global (operator) principal — `client.namespaces.*` calls require the `manage_namespaces` permission. + +## Create a namespace + + + + + + + +The `home_node` argument is optional — omit it to let Weaviate pick one automatically. The returned `Namespace` dataclass carries the assigned `home_node` and `state`. + +## Get and list namespaces + + + + + + + + + + + + + +`list_all()` is RBAC-filtered: a caller without `manage_namespaces` on any namespace receives an empty list, never a `403`. + +## Update the home node + +Updating `home_node` only affects future shard placements — existing shards are **not** moved. + + + + + + + +## Delete a namespace + + + + + + + +Deletion is two-phase: + +1. **Immediate** — namespace flips to `state: deleting`. Subsequent namespaced-principal requests targeting it are rejected. +2. **Background cascade** — Weaviate removes DB users in the namespace, then aliases, then collections. On-disk shard data may be renamed to a `.deleteme` tombstone first so cleanup can resume safely across restarts. Once empty, the namespace entity itself is removed. + +`DELETE` is **idempotent** — repeated calls during cleanup observe the same in-flight deletion and return without error. + +## Bootstrap a namespaced DB user + +The end-to-end flow for provisioning a namespaced principal: create the namespace, create a global role with the permissions you want namespaced principals to have, create a DB user inside the namespace, and assign the role. After step 4 the user can create collections using short names without seeing the namespace prefix at all. + +### Step 1: Create the namespace + +Use the [Create a namespace](#create-a-namespace) flow above with `name="customer1"`. + +### Step 2: Create a role with the permissions namespaced principals should have + +Define the role **once** at the global level. The RBAC matcher fans the role's collection patterns out per caller's namespace at request time — do **not** hard-code `customer1:` into the role definition. + + + + + + + +### Step 3: Create a DB user in the namespace + +`client.users.db.create()` takes a separate `namespace=` keyword argument. The user is stored internally as `customer1:api_user` and can only see resources in `customer1`. + + + + + + + +The call returns the API key for the new user. + +### Step 4: Assign the role + +Address the namespaced user by its fully-qualified internal name (`customer1:api_user`): + + + + + + + +### What the namespaced user sees + +Using the API key from Step 3, the namespaced user creates a collection with a **short name** — no `customer1:` prefix anywhere. Listing the schema also strips the prefix: + + + + + + + +The collection is stored internally as `customer1:Movies`. A global operator listing the same schema sees `customer1:Movies` unstripped. + +## Grant the `manage_namespaces` permission + +All `/v1/namespaces` endpoints require the operator-only `manage_namespaces` permission. To delegate namespace administration to another principal, create a role with this permission scoped to a specific namespace or to all (`*`): + + + + + + + +## Further resources + +- [Concepts: Namespaces](../concepts/namespaces.md) — what namespaces are, principal model, name resolution rules. +- [How-to: Multi-tenancy](../manage-collections/multi-tenancy.mdx) — per-collection tenant isolation (different feature). +- [Configuration: RBAC](./rbac/index.mdx) — how the namespaced/global principal split affects roles. +- [Configuration: OIDC configuration](/deploy/configuration/oidc.md) — claim configuration for OIDC token classification. + +## Questions and feedback + +import DocsFeedback from '/\_includes/docs-feedback.mdx'; + + diff --git a/docs/weaviate/configuration/rbac/index.mdx b/docs/weaviate/configuration/rbac/index.mdx index 3f6cd897..fba30314 100644 --- a/docs/weaviate/configuration/rbac/index.mdx +++ b/docs/weaviate/configuration/rbac/index.mdx @@ -88,6 +88,12 @@ style Permissions fill:#E0F7FA,stroke:#B2EBF2,stroke-width:1px This RBAC system ensures that users only have the access necessary for their roles, enhancing both security and manageability within Weaviate. Roles and permissions can be managed through the Weaviate REST API directly or through a **[client library](/weaviate/configuration/rbac/manage-roles)** programmatically. +:::info On namespace-enabled clusters + +When [namespaces](/weaviate/concepts/namespaces.md) are enabled, every authenticated principal is classified as either **namespaced** (a dynamic DB user, or an OIDC user whose token carries a namespace claim) or **global** (a static API key, or an OIDC user with the global claim). Role definitions stay namespace-agnostic — the Casbin matcher scopes them to the caller's own namespace at request time. Built-in roles split: `root` and `read-only` (env-var-only) are reserved for global principals. `admin` and `viewer` (assignable) are the roles that are safe to assign to namespaced principals. + +::: + ## Roles ### Predefined roles diff --git a/docs/weaviate/manage-collections/collection-aliases.mdx b/docs/weaviate/manage-collections/collection-aliases.mdx index f9519ba7..a7a5bfcf 100644 --- a/docs/weaviate/manage-collections/collection-aliases.mdx +++ b/docs/weaviate/manage-collections/collection-aliases.mdx @@ -19,6 +19,12 @@ import CollectionAliases from '/_includes/feature-notes/collection-aliases.mdx'; +:::note Namespaces + +On namespace-enabled clusters, aliases are scoped to a namespace — an alias resolves to a collection in the same namespace, and cross-namespace aliases are not supported. See [Namespaces](../concepts/namespaces.md). + +::: + Collection aliases allow you to create alternative names for your collections. This is useful for migrating between collections without downtime, A/B testing, or providing more convenient names for collections. An alias acts as a reference to a collection - when you query and manage objects using an alias name, Weaviate automatically routes the request to the target collection. import CollectionAliasUsage from "/_includes/collection-alias-usage.mdx"; diff --git a/docs/weaviate/manage-collections/multi-tenancy.mdx b/docs/weaviate/manage-collections/multi-tenancy.mdx index d0580fee..bd2dfc37 100644 --- a/docs/weaviate/manage-collections/multi-tenancy.mdx +++ b/docs/weaviate/manage-collections/multi-tenancy.mdx @@ -18,6 +18,12 @@ import SkipLink from "/src/components/SkipValidationLink"; Multi-tenancy provides data isolation. Each tenant is stored on a separate shard. Data stored in one tenant is not visible to another tenant. If your application serves many different users, multi-tenancy keeps their data private and makes database operations more efficient. +:::info Multi-tenancy vs namespaces + +Multi-tenancy and [namespaces](../configuration/namespaces.mdx) (`v1.38` Preview) are two different features that can be used together. Multi-tenancy isolates **data within one collection** (per-tenant shards). Namespaces isolate **collections across the cluster** (one customer = one namespace of collections). A multi-tenant collection inside a namespace works as expected. + +::: + :::info Tenant status renamed in `v1.26` In `v1.26`, the `HOT` status was renamed to `ACTIVE` and the `COLD` status was renamed to `INACTIVE`. ::: diff --git a/sidebars.js b/sidebars.js index b3200af6..6f7bacd6 100644 --- a/sidebars.js +++ b/sidebars.js @@ -525,6 +525,7 @@ const sidebars = { }, "weaviate/configuration/hnsw-snapshots", "weaviate/configuration/modules", + "weaviate/configuration/namespaces", { type: "doc", id: "weaviate/configuration/mcp-server", diff --git a/tests/docker-compose-namespaces.yml b/tests/docker-compose-namespaces.yml new file mode 100644 index 00000000..9b27c5e4 --- /dev/null +++ b/tests/docker-compose-namespaces.yml @@ -0,0 +1,37 @@ +--- +# Namespace-enabled instance for the `_includes/code/howto/namespaces.py` snippet. +# Namespaces require three boot invariants: NAMESPACES_ENABLED=true, +# DISABLE_GRAPHQL=true, and REPLICATION_MAXIMUM_FACTOR=1. The operator +# principal is a static API key (always classified as "global"), which the +# test exposes to the snippet via OPERATOR_API_KEY. +services: + weaviate_namespaces: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.38.0-rc.0 + ports: + - 8680:8080 + - 50651:50051 + restart: on-failure:0 + environment: + QUERY_DEFAULTS_LIMIT: 25 + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + CLUSTER_HOSTNAME: 'node1' + # Namespace prerequisites — validated together at boot. + NAMESPACES_ENABLED: 'true' + DISABLE_GRAPHQL: 'true' + REPLICATION_MAXIMUM_FACTOR: 1 + # Operator (global) principal: a static API key. + AUTHENTICATION_APIKEY_ENABLED: 'true' + AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'operator-api-key' + AUTHENTICATION_APIKEY_USERS: 'operator-user' + AUTHORIZATION_RBAC_ENABLED: 'true' + AUTHORIZATION_RBAC_ROOT_USERS: 'operator-user' + # Required so the operator can create namespaced DB users. + AUTHENTICATION_DB_USERS_ENABLED: 'true' +... diff --git a/tests/start-weaviate.sh b/tests/start-weaviate.sh index cd2b1f8c..17e9ed1f 100755 --- a/tests/start-weaviate.sh +++ b/tests/start-weaviate.sh @@ -7,6 +7,7 @@ nohup docker compose -f ./tests/docker-compose-anon.yml up -d nohup docker compose -f ./tests/docker-compose-anon-2.yml up -d nohup docker compose -f ./tests/docker-compose-anon-clip.yml up -d nohup docker compose -f ./tests/docker-compose-three-nodes.yml up -d +nohup docker compose -f ./tests/docker-compose-namespaces.yml up -d # Provision Keycloak (realm, client, users) so OIDC-based tests can fetch tokens. echo "Waiting for Keycloak (http://localhost:8081)..." diff --git a/tests/stop-weaviate.sh b/tests/stop-weaviate.sh index 3656f2ac..ee7bd6c9 100755 --- a/tests/stop-weaviate.sh +++ b/tests/stop-weaviate.sh @@ -7,3 +7,4 @@ docker compose -f ./tests/docker-compose-anon.yml down --timeout 30 docker compose -f ./tests/docker-compose-anon-2.yml down --timeout 30 docker compose -f ./tests/docker-compose-anon-clip.yml down --timeout 30 docker compose -f ./tests/docker-compose-three-nodes.yml down --timeout 30 +docker compose -f ./tests/docker-compose-namespaces.yml down --timeout 30 diff --git a/tests/test_python.py b/tests/test_python.py index f412afc5..b79f451b 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,8 +1,29 @@ +import os + import pytest import utils from pathlib import Path +def _client_supports_namespaces(): + """The namespace snippet needs the Python client from + weaviate-python-client#2033: typed `client.namespaces` + `Permissions.namespaces` + AND a `namespace=` argument on `users.db.create`. The API is still settling in the + open PR, so require all of them — otherwise skip rather than hard-fail.""" + try: + import inspect + + from weaviate.rbac.models import Permissions + from weaviate.users.base import _UsersDBExecutor + + return ( + hasattr(Permissions, "namespaces") + and "namespace" in inspect.signature(_UsersDBExecutor.create).parameters + ) + except Exception: + return False + + def run_py_script(script_loc, custom_replace_pairs=None): if custom_replace_pairs: temp_proc_script_loc = utils.load_and_prep_temp_file( @@ -176,6 +197,28 @@ def test_modules(empty_weaviates, script_loc): run_py_script(script_loc) +# ========== Namespaces ========== + +@pytest.mark.pyv4 +@pytest.mark.skipif( + not _client_supports_namespaces(), + reason="Installed weaviate-client lacks namespace support (needs weaviate-python-client#2033)", +) +def test_namespaces(empty_weaviates): + # Runs against the namespace-enabled instance (docker-compose-namespaces.yml, + # ports 8680/50651), connecting as the static-API-key operator (global) principal. + os.environ["OPERATOR_API_KEY"] = "operator-api-key" + run_py_script( + "./_includes/code/howto/namespaces.py", + custom_replace_pairs=[ + [ + "weaviate.connect_to_local(", + "weaviate.connect_to_local(port=8680, grpc_port=50651, ", + ], + ], + ) + + # ========== Search ========== @pytest.mark.pyv4