Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions _includes/code/howto/namespaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# How-to: Manage namespaces — Python examples.
#
# Requires Weaviate v1.38.0+ with NAMESPACES_ENABLED=true, and the Python
# client release that adds namespace support (PR
# weaviate/weaviate-python-client#2033). Examples connect as a global
# (operator) principal; the `client.namespaces.*` endpoints are
# operator-only.

import os
import time

import weaviate
from weaviate.auth import Auth
from weaviate.rbac.models import Permissions


# ==========================================
# ===== Connect as the operator =====
# ==========================================

# START Connect
client = weaviate.connect_to_local(
auth_credentials=Auth.api_key(os.environ["OPERATOR_API_KEY"]),
)
# END Connect

# Clean up any leftovers from a previous run so the script is idempotent.
for cleanup_ns in ("customer1",):
existing = client.namespaces.get(name=cleanup_ns)
if existing is not None and existing.state != "deleting":
client.namespaces.delete(name=cleanup_ns)
# Wait for the two-phase cascade cleanup to finish.
deadline = time.time() + 60
while client.namespaces.get(name=cleanup_ns) is not None and time.time() < deadline:
time.sleep(0.5)
for cleanup_role in ("namespaceUser", "namespace_admin", "all_namespace_admin"):
try:
client.roles.delete(role_name=cleanup_role)
except Exception:
pass


# ==========================================
# ===== Create a namespace =====
# ==========================================

# START CreateNamespace
# Omit home_node to let the cluster pick automatically. To pin the
# namespace's shards to a specific node, pass home_node="<node-name>"
# where <node-name> is a current storage candidate from
# client.cluster.nodes().
ns = client.namespaces.create(name="customer1")
print(ns.name, ns.home_node, ns.state)
# → customer1 <pinned-node> active
# END CreateNamespace


# ==========================================
# ===== Get a single namespace =====
# ==========================================

# START GetNamespace
ns = client.namespaces.get(name="customer1")
if ns is None:
print("not found")
else:
print(ns.name, ns.home_node, ns.state)
# END GetNamespace


# ==========================================
# ===== List all namespaces =====
# ==========================================

# START ListNamespaces
for ns in client.namespaces.list_all():
print(ns.name, ns.state)
# END ListNamespaces


# ==========================================
# ===== Update the home node =====
# ==========================================

# START UpdateNamespace
# Updating home_node only affects future shard placements — existing
# shards are not moved. Pick a storage candidate from
# client.cluster.nodes().
target_node = client.cluster.nodes()[0].name
client.namespaces.update(name="customer1", home_node=target_node)
# END UpdateNamespace


# ==========================================
# ===== Create a role for namespaced principals =====
# ==========================================

# START NamespaceUserRole
# Define the role once at the global level — the RBAC matcher fans the
# collection pattern out per caller's namespace at request time. Do NOT
# hard-code `customer1:` into role definitions.
client.roles.create(
role_name="namespaceUser",
permissions=(
Permissions.collections(
collection="*",
create_collection=True,
read_config=True,
update_config=True,
delete_collection=True,
)
+ Permissions.data(
collection="*", create=True, read=True, update=True, delete=True
)
),
)
# END NamespaceUserRole


# ==========================================
# ===== Create a namespaced DB user =====
# ==========================================

# START CreateNamespacedUser
# Bind the new DB user to a namespace via the `namespace=` argument. The user
# is stored internally as `customer1:api_user` and can only see resources in
# `customer1`.
api_key = client.users.db.create(user_id="api_user", namespace="customer1")
print(api_key)
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# END CreateNamespacedUser


# ==========================================
# ===== Assign the role to the namespaced user =====
# ==========================================

# START AssignNamespaceUserRole
# Address the namespaced user by its fully-qualified internal name.
client.users.db.assign_roles(
user_id="customer1:api_user", role_names=["namespaceUser"]
)
# END AssignNamespaceUserRole


# ==========================================
# ===== What the namespaced user sees =====
# ==========================================

# START NamespacedUserView
# Connect as the namespaced user (using the api_key from when the user
# was created) and create a collection with a SHORT name — no
# `customer1:` prefix.
ns_client = weaviate.connect_to_local(
auth_credentials=Auth.api_key(api_key),
)
ns_client.collections.create(name="Movies")

# Listing the schema strips the namespace prefix:
for name in ns_client.collections.list_all():
print(name)
# → Movies

ns_client.close()
# END NamespacedUserView


# ==========================================
# ===== Define a role with namespace-management permissions =====
# ==========================================

# START NamespacePermissions
# `manage_namespaces` is the operator-only RBAC permission for
# /v1/namespaces CRUD. Scope it to a specific namespace or to all (`*`).
client.roles.create(
role_name="namespace_admin",
permissions=Permissions.namespaces(namespace="customer1", manage=True),
)

# Wildcard — manage any namespace
client.roles.create(
role_name="all_namespace_admin",
permissions=Permissions.namespaces(namespace="*", manage=True),
)
# END NamespacePermissions


# ==========================================
# ===== Delete a namespace =====
# ==========================================

# START DeleteNamespace
# Two-phase: the namespace flips to `state: deleting`, then a background
# cascade removes its DB users, aliases, and collections. Idempotent —
# repeated calls during cleanup return without error.
client.namespaces.delete(name="customer1")
# END DeleteNamespace


client.close()
5 changes: 5 additions & 0 deletions _includes/feature-notes/namespaces.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
:::caution Preview — added in `v1.38`

This is a preview feature. The API may change in future releases.

:::
20 changes: 20 additions & 0 deletions _includes/namespaces-overview.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
A **namespace** is a cluster-level isolation boundary: every collection and alias belongs to **exactly one** namespace, so one physical Weaviate cluster can host many isolated logical "customer clusters" without anything leaking between them.

For example, a user in namespace `customer1` creates a collection called `Movies`. The cluster stores it under the qualified name `customer1:Movies`, but that user only ever sees the short name `Movies`. A user in another namespace can create their own `Movies` collection without collision.

Each namespace is an entity with three attributes:

- A **name** (`customer1`) — immutable, lowercase ASCII plus digits and hyphens, 3–36 characters.
- A **home node** that every collection in the namespace pins all its shards to (each namespace runs at replication factor 1).
- A **state**, either `active` or `deleting`.

On a namespace-enabled cluster the `:` character is **reserved** — collection, alias, role, and user IDs cannot contain it, so the split between a namespace and a short name is always unambiguous.

Terminology used throughout these pages:

- **Principal** — an authenticated identity making a request. On a namespace-enabled cluster every principal is either *namespaced* or *global*.
- **Namespaced principal** — a caller bound to a single namespace (a dynamic DB user, or an OIDC user whose token carries a namespace claim). Sees and submits short names like `Movies`.
- **Global principal** — a cluster-wide caller (a static API key, or an OIDC user with the global claim). Sees and must submit fully-qualified names like `customer1:Movies`.
- **Operator** — a global principal with administrative permissions (such as `manage_namespaces`) who creates and manages namespaces and their users.
- **Name resolution** — how Weaviate maps between the short names a namespaced principal uses (`Movies`) and the qualified names stored on the cluster (`customer1:Movies`), stripping the prefix from responses automatically.
- **Home node** — the single node a namespace's shards are pinned to.
5 changes: 5 additions & 0 deletions docs/deploy/configuration/env-vars/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import APITable from '@site/src/components/APITable';
| `DEFAULT_SHARDING_COUNT` | Default `desiredCount` for new single-tenant collections, used when the collection definition does not specify one. An explicit `desiredCount` in the class creation request still takes precedence. A value of `0` (default) uses the cluster node count. Multi-tenant collections are unaffected. Must be `<= 512`. Runtime-configurable. Default: `0`<br/>Added in `v1.37` | `string - number` | `12` |
| `DEFAULT_VECTORIZER_MODULE` | Default vectorizer module - can be overridden by the vectorizer in the collection definition. | `string` | `text2vec-contextionary` |
| `API_BASED_MODULES_DISABLED` | Weaviate automatically enables the usage of all [API based modules](../../../weaviate/model-providers/index.md#api-based). Set this variable to `true` in order to limit access and only allow specific modules through the [`ENABLE_MODULES`](#ENABLE_MODULES) variable. Default: `false`<br/> Added in `v1.33` | `boolean` | `true` |
| `DISABLE_GRAPHQL` | If `true`, disable the GraphQL API cluster-wide; use the REST and gRPC APIs instead. Required when [namespaces](/weaviate/concepts/namespaces.md) are enabled. Default: `false` | `boolean` | `true` |
| `DISABLE_LAZY_LOAD_SHARDS` | When `false`, enable lazy shard loading to improve mean time to recovery in multi-tenant deployments. **Deprecated in `v1.36.6`.** Use `LAZY_LOAD_SHARD_COUNT_THRESHOLD` and `LAZY_LOAD_SHARD_SIZE_THRESHOLD_GB` instead. Weaviate now auto-detects when lazy loading is needed per collection. | `string` | `false` |
| `DISABLE_TELEMETRY` | Disable [telemetry](/deploy/configuration/telemetry.md) data collection | boolean | `false` |
| `DISK_USE_READONLY_PERCENTAGE` | If disk usage is higher than the given percentage all shards on the affected node will be marked as `READONLY`, meaning all future write requests will fail. See [Disk Pressure Warnings and Limits for details](/deploy/configuration/persistence.md#disk-pressure-warnings-and-limits). | `string - number` | `90` |
Expand Down Expand Up @@ -72,6 +73,7 @@ import APITable from '@site/src/components/APITable';
| `MEMORY_READONLY_PERCENTAGE` | If memory usage is higher than the given percentage all shards on the affected node will be marked as `READONLY`, meaning all future write requests will fail. (Default: `0` - i.e. no limit) | `string - number` | `75` |
| `MEMORY_WARNING_PERCENTAGE` | If memory usage is higher than the given percentage a warning will be logged by all shards on the affected node's disk. (Default: `0` - i.e. no limit) | `string - number` | `85` |
| `MODULES_CLIENT_TIMEOUT` | Timeout for requests to Weaviate modules. Default: `50s` | `string - duration` | `5s`, `10m`, `1h` |
| `NAMESPACES_ENABLED` | If `true`, enable cluster-level [namespace](/weaviate/concepts/namespaces.md) isolation (`v1.38` Preview). Requires `DISABLE_GRAPHQL=true` and `REPLICATION_MAXIMUM_FACTOR=1`. Supported only on new clusters — cannot be enabled on a cluster with pre-existing non-namespaced collections, and a cluster that has run with it `true` cannot be downgraded. Default: `false` <br/>Added in `v1.38` | `boolean` | `true` |
| `OBJECTS_TTL_BATCH_SIZE` | Number of objects deleted per batch during TTL cleanup. With the default pause settings, a pause occurs every `OBJECTS_TTL_BATCH_SIZE * OBJECTS_TTL_PAUSE_EVERY_NO_BATCHES` objects (100,000 by default). Can be modified at runtime. Default: `10000` <br/>Added in `v1.36` | `string - number` | `10000` |
| `OBJECTS_TTL_CONCURRENCY_FACTOR` | Controls the concurrency of the TTL deletion process as a multiplier. Higher values use more resources but delete faster. Must be greater than 0. Can be modified at runtime. Default: `1` <br/>Added in `v1.36` | `string - number` | `1` |
| `OBJECTS_TTL_DELETE_SCHEDULE` | Schedule for deleting expired objects. Accepts standard 5-field cron format, 6-field (with seconds), 7-field (with seconds and year), descriptors (`@yearly`, `@monthly`, `@weekly`, `@daily`, `@hourly`), or hash expressions. Default: `""` (disabled) <br/>Added in `v1.36` | `string - cron format` | `0 */6 * * *` (every 6 hours) |
Expand Down Expand Up @@ -184,6 +186,8 @@ For more information on authentication and authorization, see the [Authenticatio
| `AUTHENTICATION_OIDC_SCOPES` | OIDC scopes to request | `string - comma-separated list` | `openid,email` |
| `AUTHENTICATION_OIDC_SKIP_CLIENT_ID_CHECK` | Skip OIDC Client ID check | `boolean` | `false` |
| `AUTHENTICATION_OIDC_USERNAME_CLAIM` | OIDC Username Claim | `string` | `email` |
| `OIDC_NAMESPACE_CLAIM` | On namespace-enabled clusters (`NAMESPACES_ENABLED=true`), name of the OIDC token claim that carries the principal's namespace. See [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification). <br/>Added in `v1.38` | `string` | `namespace` |
| `OIDC_GLOBAL_PRINCIPAL_CLAIM` | On namespace-enabled clusters, name of the OIDC token claim that marks the principal as global (boolean). Mutually exclusive with `OIDC_NAMESPACE_CLAIM` on a per-token basis — see [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification). <br/>Added in `v1.38` | `string` | `is_global` |
| `AUTHORIZATION_ADMINLIST_ENABLED` | Enable AdminList authorization scheme (mutually exclusive with `AUTHORIZATION_RBAC_ENABLED`) | `boolean` | `true` |
| `AUTHORIZATION_ADMINLIST_USERS` | Users with admin permission when AdminList scheme used | `string - comma-separated list` | `jane@example.com,john@example.com` |
| `AUTHORIZATION_ADMINLIST_READONLY_USERS` | Users with read-only permission when AdminList scheme used | `string - comma-separated list` | `alice@example.com,dave@example.com` |
Expand Down Expand Up @@ -236,6 +240,7 @@ For more information on authentication and authorization, see the [Authenticatio
| `REPLICA_MOVEMENT_MINIMUM_ASYNC_WAIT` | How long replica movement waits after file copy but before finalizing the move in order for in progress writes to finish. Default: `60` seconds <br/>Added in `v1.32` | `string - number` | `90` |
| `REPLICATED_INDICES_REQUEST_QUEUE_ENABLED` | Enable/disable the request queue buffer for replicated indices in multi-node clusters. Can be modified at runtime. Default: `false` | `boolean` | `true` |
| `REPLICATION_ENGINE_MAX_WORKERS` | The number of workers to process replica movements in parallel. Default: `10` <br/>Added in `v1.32` | `string - number` | `5` |
| `REPLICATION_MAXIMUM_FACTOR` | The maximum replication factor for all collections in the cluster. | `string - number` | `1` |
| `REPLICATION_MINIMUM_FACTOR` | The minimum replication factor for all collections in the cluster. | `string - number` | `3` |

```mdx-code-block
Expand Down
18 changes: 18 additions & 0 deletions docs/deploy/configuration/oidc.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,24 @@ Configuring the OIDC token issuer is outside the scope of Weaviate's configurati
By default, Weaviate validates that the token includes a specified client id in the audience claim. If your token issuer does not support this feature, you can turn it off as outlined in the [authentication configuration](./authentication.md#oidc-docker).
:::

## OIDC on namespace-enabled clusters

On clusters with `NAMESPACES_ENABLED=true` ([namespaces](/weaviate/concepts/namespaces.md)), every OIDC token is classified as either a **namespaced** or **global** principal. Two env vars select which claims drive the classification:

| Env var | Purpose |
|---|---|
| `OIDC_NAMESPACE_CLAIM` | Name of the claim that holds the namespace string (e.g. `namespace`). |
| `OIDC_GLOBAL_PRINCIPAL_CLAIM` | Name of the claim that holds the global-principal boolean (e.g. `is_global`). |

A token must select exactly one classification. The server **rejects** tokens that:

- Carry both a namespace claim and the global-principal claim set to `true`.
- Carry neither claim on a namespace-enabled cluster.
- Name a namespace that doesn't exist (Weaviate never auto-creates).
- Combine a namespace claim with a `root` group from `AUTHORIZATION_RBAC_ROOT_GROUPS` / `_USERS` — `root` is cluster-global and cannot coexist with a namespace.

On clusters where `NAMESPACES_ENABLED=false`, presence of either claim in the token causes the request to be rejected. See [Namespaces — OIDC classification](/weaviate/concepts/namespaces.md#oidc-classification) for the full rules.

## Questions and feedback

import DocsFeedback from '/_includes/docs-feedback.mdx';
Expand Down
6 changes: 6 additions & 0 deletions docs/weaviate/concepts/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,12 @@ For details on configuring your schema, see the [schema tutorial](../starter-gui

To separate data within a cluster, use multi-tenancy. Weaviate partitions the cluster into shards. Each shard holds data for a single tenant.

:::info Multi-tenancy is different from namespaces

[Multi-tenancy](../manage-collections/multi-tenancy.mdx) isolates **data within one collection**. [Namespaces](./namespaces.md) isolate **collections across the cluster**. They're orthogonal — a multi-tenant collection inside a namespace works as expected.

:::

```mermaid
%%{init: {'theme': 'base', 'themeVariables': { 'background': '#f5f5f5' }}}%%
flowchart TB
Expand Down
4 changes: 4 additions & 0 deletions docs/weaviate/concepts/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ import AcademyAdmonition from '@site/src/components/AcademyAdmonition';

- Read more about Weaviate's vector quantization options.

**[Namespaces](./namespaces.md)**

- Cluster-level isolation where every collection and alias belongs to exactly one namespace, letting one cluster host many isolated logical "customer clusters".

## Weaviate Architecture

The figure below gives a 30,000 feet view of Weaviate's architecture.
Expand Down
Loading
Loading