diff --git a/_includes/code/howto/search.filters.nested.py b/_includes/code/howto/search.filters.nested.py new file mode 100644 index 00000000..d80e05ec --- /dev/null +++ b/_includes/code/howto/search.filters.nested.py @@ -0,0 +1,161 @@ +# Howto: Search -> Filters on nested object properties - Python examples. +# +# Preview feature: requires Weaviate v1.38+ with +# `WEAVIATE_PREVIEW_NESTED_FILTERING=on` set on the server. Released +# Weaviate versions reject `cars.make`-style nested paths at the filter +# parser. Not wired into pytest CI yet — promote at GA. + +import weaviate +from weaviate.classes.config import Configure, Property, DataType, Tokenization +from weaviate.classes.query import Filter + +client = weaviate.connect_to_local() + +client.collections.delete("Document") + +# Schema: Document.cars (object[]) -> tires (object[]). +# Mirrors the path patterns used by the worked examples below +# (cars.make, cars[0].make, cars.tires.width, ...). +client.collections.create( + name="Document", + vector_config=Configure.Vectors.self_provided(), + properties=[ + Property(name="title", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property( + name="cars", + data_type=DataType.OBJECT_ARRAY, + nested_properties=[ + Property(name="make", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property(name="color", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property( + name="tires", + data_type=DataType.OBJECT_ARRAY, + nested_properties=[ + Property(name="brand", data_type=DataType.TEXT, tokenization=Tokenization.FIELD), + Property(name="width", data_type=DataType.INT), + ], + ), + ], + ), + ], +) + +docs = client.collections.use("Document") +docs.data.insert_many([ + # Doc 1: two cars; (Toyota, red) + (Honda, blue) + {"title": "doc1", "cars": [ + {"make": "Toyota", "color": "red", + "tires": [{"brand": "Bridgestone", "width": 215}, + {"brand": "Bridgestone", "width": 215}]}, + {"make": "Honda", "color": "blue", + "tires": [{"brand": "Pirelli", "width": 205}, + {"brand": "Pirelli", "width": 205}]}, + ]}, + # Doc 2: one Toyota, no tires + {"title": "doc2", "cars": [ + {"make": "Toyota", "color": "blue"}, + ]}, + # Doc 3: one Honda (red) with wide Michelin tires + {"title": "doc3", "cars": [ + {"make": "Honda", "color": "red", + "tires": [{"brand": "Michelin", "width": 250}, + {"brand": "Michelin", "width": 250}]}, + ]}, +]) + + +# ========================================== +# ===== Existential match (any element) ===== +# ========================================== + +# START NestedExistential +# "any car has make = Toyota" — matches Doc 1 (first car) and Doc 2 (only car) +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars.make").equal("Toyota"), + # highlight-end + return_properties=["title"], +) + +for o in response.objects: + print(o.properties) +# END NestedExistential + +assert len(response.objects) == 2 + + +# ========================================== +# ===== Positional match (cars[N]) ===== +# ========================================== + +# START NestedPositional +# "the FIRST car has make = Toyota" — Doc 3's first car is Honda, so it's excluded +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars[0].make").equal("Toyota"), + # highlight-end + return_properties=["title"], +) +# END NestedPositional + +assert len(response.objects) == 2 + + +# ========================================== +# ===== Same-element AND across leaves ===== +# ========================================== + +# START NestedSameElementAnd +# "the SAME car is both Toyota AND red" — only Doc 1's first car qualifies. +# Without same-element correlation a doc with separate (Toyota, blue) and +# (Honda, red) cars would also match, which is wrong. +response = docs.query.fetch_objects( + # highlight-start + filters=( + Filter.by_property("cars.make").equal("Toyota") + & Filter.by_property("cars.color").equal("red") + ), + # highlight-end + return_properties=["title"], +) +# END NestedSameElementAnd + +assert len(response.objects) == 1 + + +# ========================================== +# ===== Recursive path (object[] inside object[]) ===== +# ========================================== + +# START NestedRecursive +# "any tire on any car is wider than 200" — Doc 1 (215) and Doc 3 (250) +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars.tires.width").greater_than(200), + # highlight-end + return_properties=["title"], +) +# END NestedRecursive + +assert len(response.objects) == 2 + + +# ========================================== +# ===== IsNull on an intermediate object ===== +# ========================================== + +# START NestedIsNull +# "the first car has no tires" — only the Toyota in Doc 2 +response = docs.query.fetch_objects( + # highlight-start + filters=Filter.by_property("cars[0].tires").is_none(True), + # highlight-end + return_properties=["title"], +) +# END NestedIsNull + +assert len(response.objects) == 1 + + +client.collections.delete("Document") +client.close() diff --git a/docs/weaviate/api/graphql/filters.md b/docs/weaviate/api/graphql/filters.md index 85533951..7055aa67 100644 --- a/docs/weaviate/api/graphql/filters.md +++ b/docs/weaviate/api/graphql/filters.md @@ -149,7 +149,7 @@ Starting with `v1.12.0` you can configure your own [stopword lists for the inver ## Multiple operands -You can set multiple operands or [nest conditions](../../search/filters.md#nested-filters). +You can set multiple operands or [combine conditions with `And` / `Or`](../../search/filters.md#combine-filters-with-and-or-or). :::tip You can filter datetimes similarly to numbers, with the `valueDate` given as `string` in [RFC3339](https://datatracker.ietf.org/doc/rfc3339/) format. @@ -472,6 +472,61 @@ import GraphQLFiltersWhereBeaconCount from '/_includes/code/graphql.filters.wher +### By nested object property + +:::caution Preview feature + +Available from Weaviate `v1.38` as a preview, gated by `WEAVIATE_PREVIEW_NESTED_FILTERING=on` on the server. See [Filter on nested object properties](../../search/filters.md#filter-on-nested-object-properties) for the conceptual guide and worked examples. + +::: + +A `where` filter can target a leaf inside an [`object` / `object[]` property](../../config-refs/datatypes.md#object). The `path` is a **single-element array** containing a dotted path; `[N]` pins a segment to an array index. + +```graphql +# Any car has make = "Toyota" +{ + Get { + Document( + where: { + path: ["cars.make"] + operator: Equal + valueText: "Toyota" + } + ) { title } + } +} + +# The first car's third tire is a Bridgestone +{ + Get { + Document( + where: { + path: ["cars[0].tires[2].brand"] + operator: Equal + valueText: "Bridgestone" + } + ) { title } + } +} + +# Same-element correlation: the SAME car is both Toyota AND red +{ + Get { + Document( + where: { + operator: And + operands: [ + { path: ["cars.make"], operator: Equal, valueText: "Toyota" } + { path: ["cars.color"], operator: Equal, valueText: "red" } + ] + } + ) { title } + } +} +``` + +Don't confuse this with a [reference-path filter](#by-cross-references): a reference-path `path` has multiple elements traversing cross-references (`["inCity", "City", "name"]`), while a nested-path `path` is a **single element** with dots inside it (`["cars.make"]`). + ### By geo coordinates A special case of the `Where` filter is with geoCoordinates. This filter is only supported by the `Get{}` function. If you've set the `geoCoordinates` property type, you can search in an area based on kilometers. diff --git a/docs/weaviate/config-refs/datatypes.md b/docs/weaviate/config-refs/datatypes.md index 286a3c4b..eafcf082 100644 --- a/docs/weaviate/config-refs/datatypes.md +++ b/docs/weaviate/config-refs/datatypes.md @@ -534,10 +534,10 @@ The `object` type allows you to store nested data as a JSON object that can be n For example, a `Person` collection could have an `address` property as an object. It could in turn include nested properties such as `street` and `city`: -:::note Limitations -Currently, `object` and `object[]` datatype properties are not indexed and not vectorized. +:::note Indexing and filtering + +`object` and `object[]` properties are not vectorized — only their leaf scalars are stored in the inverted index. From Weaviate `v1.38` (preview), you can filter on nested-object leaves using a dotted path syntax. See [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). -Future plans include the ability to index nested properties, for example to allow for filtering on nested properties and vectorization options. ::: ### Examples diff --git a/docs/weaviate/manage-collections/collection-operations.mdx b/docs/weaviate/manage-collections/collection-operations.mdx index e920e093..5d61a4ec 100644 --- a/docs/weaviate/manage-collections/collection-operations.mdx +++ b/docs/weaviate/manage-collections/collection-operations.mdx @@ -380,6 +380,8 @@ This configuration for nested objects defines the following: } ``` +To filter on values inside nested objects, see [Filter on nested object properties](../search/filters.md#filter-on-nested-object-properties). +
diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md index 6e645113..9de2dc8a 100644 --- a/docs/weaviate/search/filters.md +++ b/docs/weaviate/search/filters.md @@ -10,6 +10,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/\_includes/code/howto/search.filters.py'; +import PyCodeNested from '!!raw-loader!/\_includes/code/howto/search.filters.nested.py'; import PyCodeV3 from '!!raw-loader!/\_includes/code/howto/search.filters-v3.py'; import JavaScriptCode from '!!raw-loader!/\_includes/code/howto/search.filters.ts'; import GoCode from '!!raw-loader!/\_includes/code/howto/go/docs/mainpkg/search-filters_test.go'; @@ -196,9 +197,9 @@ The output is like this:
-## Nested filters +## Combine filters with `And` or `Or` -You can group and nest filters. +Group and nest filter conditions with `And` and `Or` operators to express compound logic. @@ -1064,6 +1065,122 @@ This filter requires the [property null state](../config-refs/indexing/inverted- +## Filter on nested object properties + +:::caution Preview feature + +Available from Weaviate `v1.38` as a preview, gated by the `WEAVIATE_PREVIEW_NESTED_FILTERING=on` environment variable on the server. The path syntax and operator semantics are stable, but the on-disk encoding may change before GA — don't rely on persistent state from preview clusters carrying over to the GA release. The env var is removed at GA and the feature is enabled unconditionally. + +::: + +[`object` and `object[]` properties](../config-refs/datatypes.md#object) carry their own nested schemas. To filter on a value inside a nested object, use a single dotted path naming the path from the parent property down to the leaf you want to compare. + +Given a collection like this: + + + +The filter property is a single dotted path. The dot is the only separator. An optional `[N]` after any segment pins that segment to an array index (0-based). + +| Path | Meaning | +|---|---| +| `cars.make` | Any car's `make` (matches if **any** element of the `cars` array has it) | +| `cars[0].make` | The first car's `make` (positional) | +| `cars.tires.width` | Any tire on any car (recursive across two `object[]` levels) | +| `cars[1].tires[2].brand` | The second car's third tire's `brand` (positional through nesting) | + +`[N]` on a segment requires that segment to be an `object[]` (array). Every intermediate segment must be `object` or `object[]` — you cannot pivot through a scalar. The leaf may be any supported scalar type. + +### Match any element (default) + +A path without `[N]` markers matches if **any** element in the parent array satisfies the condition. + + + + + + + +### Match by position + +Use `[N]` to pin a path segment to a specific array index. Indices are 0-based. + + + + + + + +### Same-element correlation across leaves + +Combining two leaf filters with `And` matches when **the same element** in the parent array satisfies both. A document with one car `(Toyota, blue)` and another `(Honda, red)` would not match `cars.make = "Toyota" AND cars.color = "red"` — both conditions must hold on the **same** car. + + + + + + + +### Deep / recursive paths + +`object[]` can nest inside `object[]` to any depth. Each segment in the dotted path traverses one level. + + + + + + + +### Check whether a nested object is absent + +Pointing a path at an `object` or `object[]` segment (rather than a scalar leaf) is only valid with `IsNull`, which asks whether that whole sub-object is present. + + + + + + + +### Limitations + +:::note + +- **Allowed leaf data types**: `text`, `int`, `number`, `boolean`, `date`, `uuid`, `blob`, `blobHash`, and their array variants. `geoCoordinates`, `phoneNumber`, and cross-references (`cref`) are not allowed inside nested objects. +- **`IndexFilterable` is required**: nested filtering uses the filterable inverted index on each leaf. `IndexRangeFilters` and `IndexSearchable` flags exist on nested-property definitions but are not yet exercised by the nested searcher — range filters on nested numeric leaves currently use the filterable bucket. +- **Tokenization matters**: nested `text` leaves use the same tokenization options as flat properties. For exact-match filters on names, codes, or identifiers, set `tokenization: field` on the leaf so the value is stored as a single token. +- **Reference-path vs nested-path**: a reference-path filter is a multi-element `Path` (`["inCity", "City", "name"]`) traversing cross-references; a nested-path filter is a **single-element** path with dots inside it (`["cars.make"]`). + +::: + ## Filter considerations ### Tokenization