From 192f84dc69b589ad648725d1722cf264377f697d Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Mon, 25 May 2026 14:47:23 +0200 Subject: [PATCH 1/2] Add docs --- _includes/code/howto/search.filters.nested.py | 161 ++++++++++++++++++ docs/weaviate/api/graphql/filters.md | 57 ++++++- docs/weaviate/config-refs/datatypes.md | 6 +- .../collection-operations.mdx | 2 + docs/weaviate/search/filters.md | 123 ++++++++++++- 5 files changed, 343 insertions(+), 6 deletions(-) create mode 100644 _includes/code/howto/search.filters.nested.py diff --git a/_includes/code/howto/search.filters.nested.py b/_includes/code/howto/search.filters.nested.py new file mode 100644 index 000000000..d80e05ec6 --- /dev/null +++ b/_includes/code/howto/search.filters.nested.py @@ -0,0 +1,161 @@ +# Howto: Search -> Filters on nested object properties - Python examples. +# +# Preview feature: requires Weaviate v1.38+ with +# `WEAVIATE_PREVIEW_NESTED_FILTERING=on` set on the server. Released +# Weaviate versions reject `cars.make`-style nested paths at the filter +# parser. Not wired into pytest CI yet — promote at GA. + +import weaviate +from weaviate.classes.config import Configure, Property, DataType, Tokenization +from weaviate.classes.query import Filter + +client = weaviate.connect_to_local() + +client.collections.delete("Document") + +# Schema: Document.cars (object[]) -> tires (object[]). +# Mirrors the path patterns used by the worked examples below +# (cars.make, cars[0].make, cars.tires.width, ...). +client.collections.create( + name="Document", + vector_config=Configure.Vectors.self_provided(), + properties=[ + Property(name="title", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property( + name="cars", + data_type=DataType.OBJECT_ARRAY, + nested_properties=[ + Property(name="make", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property(name="color", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property( + name="tires", + data_type=DataType.OBJECT_ARRAY, + nested_properties=[ + Property(name="brand", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property(name="width", data_type=DataType.INT), + ], + ), + ], + ), + ], +) + +docs = client.collections.use("Document") +docs.data.insert_many([ + # Doc 1: two cars; (Toyota, red) + (Honda, blue) + {"title": "doc1", "cars": [ + {"make": "Toyota", "color": "red", + "tires": [{"brand": "Bridgestone", "width": 215}, + {"brand": "Bridgestone", "width": 215}]}, + {"make": "Honda", "color": "blue", + "tires": [{"brand": "Pirelli", "width": 205}, + {"brand": "Pirelli", "width": 205}]}, + ]}, + # Doc 2: one Toyota, no tires + {"title": "doc2", "cars": [ + {"make": "Toyota", "color": "blue"}, + ]}, + # Doc 3: one Honda (red) with wide Michelin tires + {"title": "doc3", "cars": [ + {"make": "Honda", "color": "red", + "tires": [{"brand": "Michelin", "width": 250}, + {"brand": "Michelin", "width": 250}]}, + ]}, +]) + + +# ========================================== +# ===== Existential match (any element) ===== +# ========================================== + +# START NestedExistential +# "any car has make = Toyota" — matches Doc 1 (first car) and Doc 2 (only car) +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars.make").equal("Toyota"), + # highlight-end + return_properties=["title"], +) + +for o in response.objects: + print(o.properties) +# END NestedExistential + +assert len(response.objects) == 2 + + +# ========================================== +# ===== Positional match (cars[N]) ===== +# ========================================== + +# START NestedPositional +# "the FIRST car has make = Toyota" — Doc 3's first car is Honda, so it's excluded +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars[0].make").equal("Toyota"), + # highlight-end + return_properties=["title"], +) +# END NestedPositional + +assert len(response.objects) == 2 + + +# ========================================== +# ===== Same-element AND across leaves ===== +# ========================================== + +# START NestedSameElementAnd +# "the SAME car is both Toyota AND red" — only Doc 1's first car qualifies. +# Without same-element correlation a doc with separate (Toyota, blue) and +# (Honda, red) cars would also match, which is wrong. +response = docs.query.fetch_objects( + # highlight-start + filters=( + Filter.by_property("cars.make").equal("Toyota") + & Filter.by_property("cars.color").equal("red") + ), + # highlight-end + return_properties=["title"], +) +# END NestedSameElementAnd + +assert len(response.objects) == 1 + + +# ========================================== +# ===== Recursive path (object[] inside object[]) ===== +# ========================================== + +# START NestedRecursive +# "any tire on any car is wider than 200" — Doc 1 (215) and Doc 3 (250) +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars.tires.width").greater_than(200), + # highlight-end + return_properties=["title"], +) +# END NestedRecursive + +assert len(response.objects) == 2 + + +# ========================================== +# ===== IsNull on an intermediate object ===== +# ========================================== + +# START NestedIsNull +# "the first car has no tires" — only the Toyota in Doc 2 +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars[0].tires").is_none(True), + # highlight-end + return_properties=["title"], +) +# END NestedIsNull + +assert len(response.objects) == 1 + + +client.collections.delete("Document") +client.close() diff --git a/docs/weaviate/api/graphql/filters.md b/docs/weaviate/api/graphql/filters.md index 855339515..7055aa676 100644 --- a/docs/weaviate/api/graphql/filters.md +++ b/docs/weaviate/api/graphql/filters.md @@ -149,7 +149,7 @@ Starting with `v1.12.0` you can configure your own [stopword lists for the inver ## Multiple operands -You can set multiple operands or [nest conditions](../../search/filters.md#nested-filters). +You can set multiple operands or [combine conditions with `And` / `Or`](../../search/filters.md#combine-filters-with-and-or-or). :::tip You can filter datetimes similarly to numbers, with the `valueDate` given as `string` in [RFC3339](https://datatracker.ietf.org/doc/rfc3339/) format. @@ -472,6 +472,61 @@ import GraphQLFiltersWhereBeaconCount from '/_includes/code/graphql.filters.wher +### By nested object property + +:::caution Preview feature + +Available from Weaviate `v1.38` as a preview, gated by `WEAVIATE_PREVIEW_NESTED_FILTERING=on` on the server. See [Filter on nested object properties](../../search/filters.md#filter-on-nested-object-properties) for the conceptual guide and worked examples. + +::: + +A `where` filter can target a leaf inside an [`object` / `object[]` property](../../config-refs/datatypes.md#object). The `path` is a **single-element array** containing a dotted path; `[N]` pins a segment to an array index. + +```graphql +# Any car has make = "Toyota" +{ + Get { + Document( + where: { + path: ["cars.make"] + operator: Equal + valueText: "Toyota" + } + ) { title } + } +} + +# The first car's third tire is a Bridgestone +{ + Get { + Document( + where: { + path: ["cars[0].tires[2].brand"] + operator: Equal + valueText: "Bridgestone" + } + ) { title } + } +} + +# Same-element correlation: the SAME car is both Toyota AND red +{ + Get { + Document( + where: { + operator: And + operands: [ + { path: ["cars.make"], operator: Equal, valueText: "Toyota" } + { path: ["cars.color"], operator: Equal, valueText: "red" } + ] + } + ) { title } + } +} +``` + +Don't confuse this with a [reference-path filter](#by-cross-references): a reference-path `path` has multiple elements traversing cross-references (`["inCity", "City", "name"]`), while a nested-path `path` is a **single element** with dots inside it (`["cars.make"]`). + ### By geo coordinates A special case of the `Where` filter is with geoCoordinates. This filter is only supported by the `Get{}` function. If you've set the `geoCoordinates` property type, you can search in an area based on kilometers. diff --git a/docs/weaviate/config-refs/datatypes.md b/docs/weaviate/config-refs/datatypes.md index 286a3c4b5..f86018e52 100644 --- a/docs/weaviate/config-refs/datatypes.md +++ b/docs/weaviate/config-refs/datatypes.md @@ -534,10 +534,10 @@ The `object` type allows you to store nested data as a JSON object that can be n For example, a `Person` collection could have an `address` property as an object. It could in turn include nested properties such as `street` and `city`: -:::note Limitations -Currently, `object` and `object[]` datatype properties are not indexed and not vectorized. +:::note Indexing and filtering + +`object` and `object[]` properties are not vectorized — only their leaf scalars are stored in the inverted index. From Weaviate `v1.38` (preview), you can filter on nested-object leaves using a dotted path syntax; see [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). -Future plans include the ability to index nested properties, for example to allow for filtering on nested properties and vectorization options. ::: ### Examples diff --git a/docs/weaviate/manage-collections/collection-operations.mdx b/docs/weaviate/manage-collections/collection-operations.mdx index e920e093c..5d61a4ecd 100644 --- a/docs/weaviate/manage-collections/collection-operations.mdx +++ b/docs/weaviate/manage-collections/collection-operations.mdx @@ -380,6 +380,8 @@ This configuration for nested objects defines the following: } ``` +To filter on values inside nested objects, see [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). +
diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md index 6e6451132..e2362eb88 100644 --- a/docs/weaviate/search/filters.md +++ b/docs/weaviate/search/filters.md @@ -10,6 +10,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/\_includes/code/howto/search.filters.py'; +import PyCodeNested from '!!raw-loader!/\_includes/code/howto/search.filters.nested.py'; import PyCodeV3 from '!!raw-loader!/\_includes/code/howto/search.filters-v3.py'; import JavaScriptCode from '!!raw-loader!/\_includes/code/howto/search.filters.ts'; import GoCode from '!!raw-loader!/\_includes/code/howto/go/docs/mainpkg/search-filters_test.go'; @@ -196,9 +197,9 @@ The output is like this:
-## Nested filters +## Combine filters with `And` or `Or` -You can group and nest filters. +Group and nest filter conditions with `And` and `Or` operators to express compound logic. @@ -736,6 +737,124 @@ The output is like this: +## Filter on nested object properties + +:::caution Preview feature + +Available from Weaviate `v1.38` as a preview, gated by the `WEAVIATE_PREVIEW_NESTED_FILTERING=on` environment variable on the server. The path syntax and operator semantics are stable, but the on-disk encoding may change before GA — don't rely on persistent state from preview clusters carrying over to the GA release. The env var is removed at GA and the feature is enabled unconditionally. + +::: + +[`object` and `object[]` properties](../config-refs/datatypes.md#object) carry their own nested schemas. To filter on a value inside a nested object, use a single dotted path naming the path from the parent property down to the leaf you want to compare. + +Given a collection like this: + + + +### Path syntax + +The filter property is a single dotted path. The dot is the only separator. An optional `[N]` after any segment pins that segment to an array index (0-based). + +| Path | Meaning | +|---|---| +| `cars.make` | Any car's `make` (matches if **any** element of the `cars` array has it) | +| `cars[0].make` | The first car's `make` (positional) | +| `cars.tires.width` | Any tire on any car (recursive across two `object[]` levels) | +| `cars[1].tires[2].brand` | The second car's third tire's `brand` (positional through nesting) | + +`[N]` on a segment requires that segment to be an `object[]` (array). Every intermediate segment must be `object` or `object[]` — you cannot pivot through a scalar. The leaf may be any supported scalar type. + +### Match any element (default) + +A path without `[N]` markers matches if **any** element in the parent array satisfies the condition. + + + + + + + +### Match by position + +Use `[N]` to pin a path segment to a specific array index. Indices are 0-based. + + + + + + + +### Same-element correlation across leaves + +Combining two leaf filters with `And` matches when **the same element** in the parent array satisfies both. A document with one car `(Toyota, blue)` and another `(Honda, red)` would not match `cars.make = "Toyota" AND cars.color = "red"` — both conditions must hold on the **same** car. + + + + + + + +### Deep / recursive paths + +`object[]` can nest inside `object[]` to any depth. Each segment in the dotted path traverses one level. + + + + + + + +### Check whether a nested object is absent + +Pointing a path at an `object` or `object[]` segment (rather than a scalar leaf) is only valid with `IsNull`, which asks whether that whole sub-object is present. + + + + + + + +### Limitations + +:::note + +- **Allowed leaf data types**: `text`, `int`, `number`, `boolean`, `date`, `uuid`, `blob`, `blobHash`, and their array variants. `geoCoordinates`, `phoneNumber`, and cross-references (`cref`) are not allowed inside nested objects. +- **`IndexFilterable` is required**: nested filtering uses the filterable inverted index on each leaf. `IndexRangeFilters` and `IndexSearchable` flags exist on nested-property definitions but are not yet exercised by the nested searcher — range filters on nested numeric leaves currently use the filterable bucket. +- **Tokenization matters**: nested `text` leaves use the same tokenization options as flat properties. For exact-match filters on names, codes, or identifiers, set `tokenization: field` on the leaf so the value is stored as a single token. +- **Reference-path vs nested-path**: a reference-path filter is a multi-element `Path` (`["inCity", "City", "name"]`) traversing cross-references; a nested-path filter is a **single-element** path with dots inside it (`["cars.make"]`). They are independent — namespace-enabled cluster restrictions on reference-path filters do not apply to nested-path filters. + +::: + ## By geo-coordinates import GeoLimitations from '/\_includes/geo-limitations.mdx'; From bdfb159fab2f98f526298b9175207312e8677a6e Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Thu, 28 May 2026 09:13:33 +0200 Subject: [PATCH 2/2] Update docs --- docs/weaviate/config-refs/datatypes.md | 2 +- docs/weaviate/search/filters.md | 234 ++++++++++++------------- 2 files changed, 117 insertions(+), 119 deletions(-) diff --git a/docs/weaviate/config-refs/datatypes.md b/docs/weaviate/config-refs/datatypes.md index f86018e52..eafcf0828 100644 --- a/docs/weaviate/config-refs/datatypes.md +++ b/docs/weaviate/config-refs/datatypes.md @@ -536,7 +536,7 @@ For example, a `Person` collection could have an `address` property as an object :::note Indexing and filtering -`object` and `object[]` properties are not vectorized — only their leaf scalars are stored in the inverted index. From Weaviate `v1.38` (preview), you can filter on nested-object leaves using a dotted path syntax; see [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). +`object` and `object[]` properties are not vectorized — only their leaf scalars are stored in the inverted index. From Weaviate `v1.38` (preview), you can filter on nested-object leaves using a dotted path syntax. See [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). ::: diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md index e2362eb88..9de2dc8ad 100644 --- a/docs/weaviate/search/filters.md +++ b/docs/weaviate/search/filters.md @@ -737,124 +737,6 @@ The output is like this: -## Filter on nested object properties - -:::caution Preview feature - -Available from Weaviate `v1.38` as a preview, gated by the `WEAVIATE_PREVIEW_NESTED_FILTERING=on` environment variable on the server. The path syntax and operator semantics are stable, but the on-disk encoding may change before GA — don't rely on persistent state from preview clusters carrying over to the GA release. The env var is removed at GA and the feature is enabled unconditionally. - -::: - -[`object` and `object[]` properties](../config-refs/datatypes.md#object) carry their own nested schemas. To filter on a value inside a nested object, use a single dotted path naming the path from the parent property down to the leaf you want to compare. - -Given a collection like this: - - - -### Path syntax - -The filter property is a single dotted path. The dot is the only separator. An optional `[N]` after any segment pins that segment to an array index (0-based). - -| Path | Meaning | -|---|---| -| `cars.make` | Any car's `make` (matches if **any** element of the `cars` array has it) | -| `cars[0].make` | The first car's `make` (positional) | -| `cars.tires.width` | Any tire on any car (recursive across two `object[]` levels) | -| `cars[1].tires[2].brand` | The second car's third tire's `brand` (positional through nesting) | - -`[N]` on a segment requires that segment to be an `object[]` (array). Every intermediate segment must be `object` or `object[]` — you cannot pivot through a scalar. The leaf may be any supported scalar type. - -### Match any element (default) - -A path without `[N]` markers matches if **any** element in the parent array satisfies the condition. - - - - - - - -### Match by position - -Use `[N]` to pin a path segment to a specific array index. Indices are 0-based. - - - - - - - -### Same-element correlation across leaves - -Combining two leaf filters with `And` matches when **the same element** in the parent array satisfies both. A document with one car `(Toyota, blue)` and another `(Honda, red)` would not match `cars.make = "Toyota" AND cars.color = "red"` — both conditions must hold on the **same** car. - - - - - - - -### Deep / recursive paths - -`object[]` can nest inside `object[]` to any depth. Each segment in the dotted path traverses one level. - - - - - - - -### Check whether a nested object is absent - -Pointing a path at an `object` or `object[]` segment (rather than a scalar leaf) is only valid with `IsNull`, which asks whether that whole sub-object is present. - - - - - - - -### Limitations - -:::note - -- **Allowed leaf data types**: `text`, `int`, `number`, `boolean`, `date`, `uuid`, `blob`, `blobHash`, and their array variants. `geoCoordinates`, `phoneNumber`, and cross-references (`cref`) are not allowed inside nested objects. -- **`IndexFilterable` is required**: nested filtering uses the filterable inverted index on each leaf. `IndexRangeFilters` and `IndexSearchable` flags exist on nested-property definitions but are not yet exercised by the nested searcher — range filters on nested numeric leaves currently use the filterable bucket. -- **Tokenization matters**: nested `text` leaves use the same tokenization options as flat properties. For exact-match filters on names, codes, or identifiers, set `tokenization: field` on the leaf so the value is stored as a single token. -- **Reference-path vs nested-path**: a reference-path filter is a multi-element `Path` (`["inCity", "City", "name"]`) traversing cross-references; a nested-path filter is a **single-element** path with dots inside it (`["cars.make"]`). They are independent — namespace-enabled cluster restrictions on reference-path filters do not apply to nested-path filters. - -::: - ## By geo-coordinates import GeoLimitations from '/\_includes/geo-limitations.mdx'; @@ -1183,6 +1065,122 @@ This filter requires the [property null state](../config-refs/indexing/inverted- +## Filter on nested object properties + +:::caution Preview feature + +Available from Weaviate `v1.38` as a preview, gated by the `WEAVIATE_PREVIEW_NESTED_FILTERING=on` environment variable on the server. The path syntax and operator semantics are stable, but the on-disk encoding may change before GA — don't rely on persistent state from preview clusters carrying over to the GA release. The env var is removed at GA and the feature is enabled unconditionally. + +::: + +[`object` and `object[]` properties](../config-refs/datatypes.md#object) carry their own nested schemas. To filter on a value inside a nested object, use a single dotted path naming the path from the parent property down to the leaf you want to compare. + +Given a collection like this: + + + +The filter property is a single dotted path. The dot is the only separator. An optional `[N]` after any segment pins that segment to an array index (0-based). + +| Path | Meaning | +|---|---| +| `cars.make` | Any car's `make` (matches if **any** element of the `cars` array has it) | +| `cars[0].make` | The first car's `make` (positional) | +| `cars.tires.width` | Any tire on any car (recursive across two `object[]` levels) | +| `cars[1].tires[2].brand` | The second car's third tire's `brand` (positional through nesting) | + +`[N]` on a segment requires that segment to be an `object[]` (array). Every intermediate segment must be `object` or `object[]` — you cannot pivot through a scalar. The leaf may be any supported scalar type. + +### Match any element (default) + +A path without `[N]` markers matches if **any** element in the parent array satisfies the condition. + + + + + + + +### Match by position + +Use `[N]` to pin a path segment to a specific array index. Indices are 0-based. + + + + + + + +### Same-element correlation across leaves + +Combining two leaf filters with `And` matches when **the same element** in the parent array satisfies both. A document with one car `(Toyota, blue)` and another `(Honda, red)` would not match `cars.make = "Toyota" AND cars.color = "red"` — both conditions must hold on the **same** car. + + + + + + + +### Deep / recursive paths + +`object[]` can nest inside `object[]` to any depth. Each segment in the dotted path traverses one level. + + + + + + + +### Check whether a nested object is absent + +Pointing a path at an `object` or `object[]` segment (rather than a scalar leaf) is only valid with `IsNull`, which asks whether that whole sub-object is present. + + + + + + + +### Limitations + +:::note + +- **Allowed leaf data types**: `text`, `int`, `number`, `boolean`, `date`, `uuid`, `blob`, `blobHash`, and their array variants. `geoCoordinates`, `phoneNumber`, and cross-references (`cref`) are not allowed inside nested objects. +- **`IndexFilterable` is required**: nested filtering uses the filterable inverted index on each leaf. `IndexRangeFilters` and `IndexSearchable` flags exist on nested-property definitions but are not yet exercised by the nested searcher — range filters on nested numeric leaves currently use the filterable bucket. +- **Tokenization matters**: nested `text` leaves use the same tokenization options as flat properties. For exact-match filters on names, codes, or identifiers, set `tokenization: field` on the leaf so the value is stored as a single token. +- **Reference-path vs nested-path**: a reference-path filter is a multi-element `Path` (`["inCity", "City", "name"]`) traversing cross-references; a nested-path filter is a **single-element** path with dots inside it (`["cars.make"]`). + +::: + ## Filter considerations ### Tokenization