From 5c6f9d73444020c7edee5a083dce6e2ced6879a8 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Wed, 22 Jun 2016 17:00:01 +0100 Subject: [PATCH 01/27] Added the nested subqueries CIP --- cip/CIP2016-06-22-nested-subqueries.adoc | 257 +++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 cip/CIP2016-06-22-nested-subqueries.adoc diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc new file mode 100644 index 0000000000..835e1da51f --- /dev/null +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -0,0 +1,257 @@ += CIP2016-06-22 - Nested subqueries +:numbered: +:toc: +:toc-placement: macro +:source-highlighter: codemirror + +*Authors:* Petra Selmer , Stefan Plantikow + +[abstract] +.Abstract +-- +This CIP proposes the incorporation of nested subqueries to Cypher. +-- + +toc::[] + + +== Motivation + +Subqueries - i.e. queries within queries - are a powerful and expressive feature allowing for: + + * Increased query expressivity + * Better query construction and readability + * Easier query composition and reuse + * Post-processing as a single unit results from multiple queries + +== Background + +This CIP may be viewed in conjunction with the EXISTS CIP and the Pattern Comprehension CIP, both of which propose variants of subqueries. + + +== Proposal + +We propose the addition of new syntax to the `MATCH` clause for expressing nested subqueries. + +Nested subqueries are self-contained, read-only Cypher queries. + +A nested subquery is denoted using the following syntax: `MATCH { }`. + +Nested subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. + +As this proposal extends the `MATCH` clause, nested subqueries can be contained within other nested subqueries at arbitrary depth. + +=== Syntax + +We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] by adding a new clause. + +Extend expressions to support string search operators: +[source, ebnf] +---- +nested-subquery-clause = "MATCH", "{", RegularQuery, "}" ; +---- + +=== Semantic clarification + +Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. + +All incoming variables remain in scope. + +Any new variable bindings produced by evaluating the subquery will augment the variable bindings of the initial record; i.e. nested subqueries behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. + +Subqueries interact with write clauses in the same manner as `MATCH`. + +It is an error for a nested subquery to try to rebind (shadow) a pre-existing outer variable binding. + +=== Examples + +Post-UNION processing: +[source, cypher] +---- +MATCH { + // authored tweets + MATCH (me:User {name: 'Alice'})-[:FOLLOWS]->(user:User), + (user)<-[:AUTHORED]-(tweet:Tweet) + RETURN tweet, tweet.time AS time, user.country AS country + UNION + // favorited tweets + MATCH (me:User {name: 'Alice'})-[:FOLLOWS]->(user:User), + (user)<-[:HAS_FAVOURITE]-(favorite:Favorite)-[:TARGETS]->(tweet:Tweet) + RETURN tweet, favourite.time AS time, user.country AS country +} +WHERE country = "se" +RETURN DISTINCT tweet +ORDER BY time DESC +LIMIT 10 +---- + +Uncorrelated nested subquery: +[source, cypher] +---- +MATCH (f:Farm {id: {farmId}) +MATCH { + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(p:Lawnmower) + RETURN b.name AS name, p.code AS code + UNION + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(v:Vehicle), + (v)<-[:IS_A]-(:Category {name: 'Tractor'}) + RETURN b.name AS name, p.code AS code +} +RETURN f, name, code +---- + +Correlated nested subquery: +[source, cypher] +---- +MATCH (f:Farm {id: {farmId})-[:IS_IN]->(country:Country) +MATCH { + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(p:Lawnmower) + RETURN b.name AS name, p.code AS code + UNION + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(v:Vehicle), + (v)<-[:IS_A]-(:Category {name: 'Tractor'}) + WHERE v.leftHandDrive = country.leftHandDrive + RETURN b.name AS name, p.code AS code +} +RETURN f, name, code +---- + +Filtered and correlated nested subquery: +[source, cypher] +---- +MATCH (f:Farm)-[:IS_IN]->(country:Country) +WHERE country.name IN {countryNames} +MATCH { + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(p:Lawnmower) + RETURN b AS brand, p.code AS code + UNION + MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + (b)-[:PRODUCES]->(v:Vehicle), + (v)<-[:IS_A]-(:Category {name: 'Tractor'}) + WHERE v.leftHandDrive = country.leftHandDrive + RETURN b AS brand, p.code AS code +} +WHERE f.type = 'organic' + AND b.certified +RETURN f, brand.name AS name, code +---- + +Doubly-nested subquery: +[source, cypher] +---- +MATCH (f:Farm {id: {farmId}}) +MATCH { + MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) + MATCH { + MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) + RETURN c, count(*) AS count + UNION + MATCH (c)-[:LIKES]->(p:Posting)<-[:POSTED_BY]-(f) + RETURN c, count(*) AS count + } + RETURN c, "customer" AS type, sum(count) AS endorsement + UNION + MATCH (s:Shop)-[:BUYS_FOOD_AT]->(f) + MATCH (s)-[:PLACES]->(a:Advertisement)-[:ABOUT]->(f) + RETURN s, "shop" AS type, count(a) * 100 AS endorsement +} +RETURN f.name AS name, type, sum(endorsement) AS endorsement +---- + +=== Interaction with existing features + +Nested subqueries do not interact directly with any existing features. + +=== Alternatives + +Alternative syntax has been considered during the gestation of this document: + + * Using round braces; i.e. `MATCH (...)` + * Using alternative keywords: + + ** `SUBQUERY` + ** `QUERY` + +== What others do + +=== SQL + +The following types of subqueries are supported in SQL: + +Scalar: +[source, cypher] +---- +SELECT orderID +FROM Orders +WHERE orderID = + (SELECT max(orderID) FROM Orders) +---- + +Multi-valued: +[source, cypher] +---- +SELECT customerID +FROM Customers +WHERE customerID IN + (SELECT customerID FROM Orders) +---- + +Correlated: +[source, cypher] +---- +SELECT orderID, customerID +FROM Orders AS O1 +WHERE orderID = + (SELECT max(O2.orderID) FROM Orders AS O2 + WHERE O2.customerID = O1.customerID) +---- + +Table-valued/table expression: +[source, cypher] +---- +SELECT orderYear +FROM + (SELECT YEAR(orderDate) AS orderYear + FROM Orders) AS D +---- + +Both scalar and table expression subqueries are out of scope for the purposes of this CIP. They will be addressed in forthcoming CIPs. + +=== SPARQL + +https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] only supports uncorrelated subqueries, exemplified by: + +[source, cypher] +---- +SELECT ?y ?minName +WHERE { + :alice :knows ?y . + { + SELECT ?y (MIN(?name) AS ?minName) + WHERE { + ?y :name ?name . + } GROUP BY ?y + } +} +---- + +Owing to the bottom-up nature of SPARQL query evaluation, the subqueries are evaluated logically first, and the results are projected up to the outer query. + +Only variables projected out of the subquery will be visible, or in scope, to the outer query. + + +== Benefits to this proposal + +* Increasing the expressivity of the language. +* Allowing unified post-processing on results from multiple (sub)queries; this is exemplified by the https://github.com/neo4j/neo4j/issues/2725[request for post-UNION processing]. +* Facilitating query readability, construction and maintainability. +* Providing a feature familiar to users of SQL. + +== Caveats to this proposal + +At the current time, we are not aware of any caveats. From d7d0a830db4a87e90fee4e60b85e3792c09219d6 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Thu, 23 Jun 2016 10:41:18 +0100 Subject: [PATCH 02/27] Sundry content edits to the subquery CIP --- cip/CIP2016-06-22-nested-subqueries.adoc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 835e1da51f..3ec6cb096b 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -9,7 +9,7 @@ [abstract] .Abstract -- -This CIP proposes the incorporation of nested subqueries to Cypher. +This CIP proposes the incorporation of nested subqueries into Cypher. -- toc::[] @@ -43,9 +43,8 @@ As this proposal extends the `MATCH` clause, nested subqueries can be contained === Syntax -We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] by adding a new clause. +We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] through the addition of a new clause: -Extend expressions to support string search operators: [source, ebnf] ---- nested-subquery-clause = "MATCH", "{", RegularQuery, "}" ; @@ -169,7 +168,7 @@ Nested subqueries do not interact directly with any existing features. === Alternatives -Alternative syntax has been considered during the gestation of this document: +Alternative syntax has been considered during the production of this document: * Using round braces; i.e. `MATCH (...)` * Using alternative keywords: From bf71712ac4db89eba92b9b6e39d8c1bdcfee2ba3 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Thu, 23 Jun 2016 15:38:30 +0100 Subject: [PATCH 03/27] Clarified the syntax wrt `OPTIONAL` --- cip/CIP2016-06-22-nested-subqueries.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 3ec6cb096b..686e99361b 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -43,11 +43,13 @@ As this proposal extends the `MATCH` clause, nested subqueries can be contained === Syntax -We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] through the addition of a new clause: +We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] as follows: [source, ebnf] ---- -nested-subquery-clause = "MATCH", "{", RegularQuery, "}" ; +Match = [ "OPTIONAL" ], ( MatchPattern | NestedSubquery ), [ "WHERE", Predicate ] ; +MatchPattern = "MATCH", Pattern ; +NestedSubquery = "MATCH", "{", RegularQuery, "}" ; ---- === Semantic clarification From f6245fd48a74c0eca5e0d36d3b025c64be711133 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Wed, 20 Jul 2016 15:22:36 +0100 Subject: [PATCH 04/27] Added the notion of write subqueries, with `UNWIND` + `DO {...}` replacing `FOREACH` --- cip/CIP2016-06-22-nested-subqueries.adoc | 127 +++++++++++++++++++---- 1 file changed, 108 insertions(+), 19 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 686e99361b..40beaa5ef6 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -23,6 +23,7 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature * Better query construction and readability * Easier query composition and reuse * Post-processing as a single unit results from multiple queries + * Perform a sequence of multiple write commands for each record == Background @@ -31,41 +32,74 @@ This CIP may be viewed in conjunction with the EXISTS CIP and the Pattern Compre == Proposal -We propose the addition of new syntax to the `MATCH` clause for expressing nested subqueries. +This proposal suggests the introduction of two new subquery constructs to Cypher. + +**1. Read-only subqueries** + +We propose the addition of new syntax to the `MATCH` clause for expressing nested read-only subqueries. Nested subqueries are self-contained, read-only Cypher queries. -A nested subquery is denoted using the following syntax: `MATCH { }`. +A nested read-only subquery is denoted using the following syntax: `MATCH { }`. Nested subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. As this proposal extends the `MATCH` clause, nested subqueries can be contained within other nested subqueries at arbitrary depth. +**2. Write-only/read-write subqueries** + +We further propose the addition of a new syntax - the `DO` clause - for expressing nested write-only/read-write subqueries that _do not return any data_. + +A nested write-only/read-write subquery is denoted using the following syntax: `DO { }`. + +We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. + + === Syntax We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] as follows: +**1. Read-only subqueries** + [source, ebnf] ---- -Match = [ "OPTIONAL" ], ( MatchPattern | NestedSubquery ), [ "WHERE", Predicate ] ; +Match = [ "OPTIONAL" ], ( MatchPattern | NestedReadOnlySubquery ), [ "WHERE", Predicate ] ; MatchPattern = "MATCH", Pattern ; -NestedSubquery = "MATCH", "{", RegularQuery, "}" ; +NestedReadOnlySubquery = "MATCH", "{", RegularQuery, "}" ; +---- + +**2. Write-only/read-write subqueries** + +[source, ebnf] +---- +Match = [ "OPTIONAL" ], "MATCH", Pattern, [ "WHERE", Predicate ], NestedWriteSubquery ; +NestedWriteSubquery = Unwind, "DO", "{", WriteSubquery, "}" ; +WriteSubquery = WriteOnlyClauseWithNoReturn, [ NestedWriteSubquery ] | + ReadWriteClauseWithNoReturn, [ NestedWriteSubquery ] ; ---- === Semantic clarification +**1. Read-only subqueries** + Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. All incoming variables remain in scope. Any new variable bindings produced by evaluating the subquery will augment the variable bindings of the initial record; i.e. nested subqueries behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. -Subqueries interact with write clauses in the same manner as `MATCH`. +Subqueries interact with write clauses in the same way as `MATCH` does. It is an error for a nested subquery to try to rebind (shadow) a pre-existing outer variable binding. +**2. Write-only/read-write subqueries** + +Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is being passed on to the remainder of the outer query. This is identical to way in which void procedures are executed. + === Examples +**1. Read-only subqueries** + Post-UNION processing: [source, cypher] ---- @@ -80,7 +114,7 @@ MATCH { (user)<-[:HAS_FAVOURITE]-(favorite:Favorite)-[:TARGETS]->(tweet:Tweet) RETURN tweet, favourite.time AS time, user.country AS country } -WHERE country = "se" +WHERE country = 'se' RETURN DISTINCT tweet ORDER BY time DESC LIMIT 10 @@ -89,13 +123,13 @@ LIMIT 10 Uncorrelated nested subquery: [source, cypher] ---- -MATCH (f:Farm {id: {farmId}) +MATCH (f:Farm {id: $farmId}) MATCH { - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code UNION - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) RETURN b.name AS name, p.code AS code @@ -106,13 +140,13 @@ RETURN f, name, code Correlated nested subquery: [source, cypher] ---- -MATCH (f:Farm {id: {farmId})-[:IS_IN]->(country:Country) +MATCH (f:Farm {id: $farmId})-[:IS_IN]->(country:Country) MATCH { - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code UNION - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) WHERE v.leftHandDrive = country.leftHandDrive @@ -125,13 +159,13 @@ Filtered and correlated nested subquery: [source, cypher] ---- MATCH (f:Farm)-[:IS_IN]->(country:Country) -WHERE country.name IN {countryNames} +WHERE country.name IN $countryNames MATCH { - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b AS brand, p.code AS code UNION - MATCH (u:User {id: {userId}})-[:LIKES]->(b:Brand), + MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) WHERE v.leftHandDrive = country.leftHandDrive @@ -145,7 +179,7 @@ RETURN f, brand.name AS name, code Doubly-nested subquery: [source, cypher] ---- -MATCH (f:Farm {id: {farmId}}) +MATCH (f:Farm {id: $farmId}) MATCH { MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) MATCH { @@ -155,18 +189,73 @@ MATCH { MATCH (c)-[:LIKES]->(p:Posting)<-[:POSTED_BY]-(f) RETURN c, count(*) AS count } - RETURN c, "customer" AS type, sum(count) AS endorsement + RETURN c, 'customer' AS type, sum(count) AS endorsement UNION MATCH (s:Shop)-[:BUYS_FOOD_AT]->(f) MATCH (s)-[:PLACES]->(a:Advertisement)-[:ABOUT]->(f) - RETURN s, "shop" AS type, count(a) * 100 AS endorsement + RETURN s, 'shop' AS type, count(a) * 100 AS endorsement } RETURN f.name AS name, type, sum(endorsement) AS endorsement ---- +**2. Write-only/read-write subqueries** + +We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. + +Using a single subquery - old version using `FOREACH`: +[source, cypher] +---- +MATCH (r:Root) +FOREACH(x IN range(1, 10) | + MERGE (c:Child {id: x}) + MERGE (r)-[:PARENT]->(c) +) +---- + +Using a single subquery - new version using `DO`: +[source, cypher] +---- +MATCH (r:Root) +UNWIND range(1, 10) AS x +DO { + MERGE (c:Child {id: x}) + MERGE (r)-[:PARENT]->(c) +} +---- + +Doubly-nested subquery - old version using `FOREACH`: +[source, cypher] +---- +MATCH (r:Root) +FOREACH (x IN range(1, 10) | + CREATE (r)-[:PARENT]->(c:Child {id: x}) + MERGE (r)-[:PUBLISHES]->(t:Topic {id: r.id + x}) + FOREACH (y IN range(1, 10) | + CREATE (c)-[p:PARENT]->(:Child {id: c.id * 10 + y}) + SET p.id = c.id * 5 + y + ) +) +---- + +Doubly-nested subquery - new version using `DO`: +[source, cypher] +---- +MATCH (r:Root) +UNWIND range(1, 10) AS x AS x +DO { + CREATE (r)-[:PARENT]->(c:Child {id: x}) + MERGE (r)-[:PUBLISHES]->(t:Topic {id: r.id + x}) + UNWIND range(1, 10) AS y + DO { + CREATE (c)-[p:PARENT]->(:Child {id: c.id * 10 + y}) + SET p.id = c.id * 5 + y + } +} +---- + === Interaction with existing features -Nested subqueries do not interact directly with any existing features. +Apart from the suggested deprecation of the `FOREACH` clause, nested read-only, write-only and read-write subqueries do not interact directly with any existing features. === Alternatives From ede133473ce12c85ec28923fbb0a5298358fe868 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Wed, 20 Jul 2016 15:48:25 +0100 Subject: [PATCH 05/27] Clarified the way in which variable bindings work (based on comments received) --- cip/CIP2016-06-22-nested-subqueries.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 40beaa5ef6..c4cc189faa 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -86,7 +86,7 @@ Conceptually, a nested subquery is evaluated for each incoming record and may pr All incoming variables remain in scope. -Any new variable bindings produced by evaluating the subquery will augment the variable bindings of the initial record; i.e. nested subqueries behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. +Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. Subqueries interact with write clauses in the same way as `MATCH` does. From a1c6442fed02dcbc62f023a000c0eba02e397118 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 26 Sep 2016 12:39:43 +0200 Subject: [PATCH 06/27] Addressed some feedback --- cip/CIP2016-06-22-nested-subqueries.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index c4cc189faa..8ae9f6f03f 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -27,7 +27,7 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature == Background -This CIP may be viewed in conjunction with the EXISTS CIP and the Pattern Comprehension CIP, both of which propose variants of subqueries. +This CIP may be viewed in the light of the EXISTS CIP and forthcoming the Pattern Comprehension CIP, both of which propose variants of subqueries. == Proposal @@ -86,7 +86,7 @@ Conceptually, a nested subquery is evaluated for each incoming record and may pr All incoming variables remain in scope. -Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. +Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. Any other variable bindings introduced in the subquery will not be visible to the outer scope. Subqueries interact with write clauses in the same way as `MATCH` does. @@ -94,7 +94,7 @@ It is an error for a nested subquery to try to rebind (shadow) a pre-existing ou **2. Write-only/read-write subqueries** -Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is being passed on to the remainder of the outer query. This is identical to way in which void procedures are executed. +Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is being passed on to the remainder of the outer query. === Examples @@ -314,7 +314,7 @@ Both scalar and table expression subqueries are out of scope for the purposes of === SPARQL -https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] only supports uncorrelated subqueries, exemplified by: +https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] only supports uncorrelated subqueries in the standard, exemplified by: [source, cypher] ---- From 4caeb54817e1378725f02828b758f8eb7c760217 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Thu, 17 Nov 2016 11:44:13 +0000 Subject: [PATCH 07/27] Addressing comments; making clarifications --- cip/CIP2016-06-22-nested-subqueries.adoc | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 8ae9f6f03f..2933ca5618 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -27,8 +27,7 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature == Background -This CIP may be viewed in the light of the EXISTS CIP and forthcoming the Pattern Comprehension CIP, both of which propose variants of subqueries. - +This CIP may be viewed in light of the EXISTS CIP and the forthcoming Pattern Comprehension CIP, both of which propose variants of subqueries. == Proposal @@ -94,7 +93,9 @@ It is an error for a nested subquery to try to rebind (shadow) a pre-existing ou **2. Write-only/read-write subqueries** -Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is being passed on to the remainder of the outer query. +Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is passed on to the remainder of the outer query. + +A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. === Examples @@ -223,6 +224,13 @@ DO { } ---- +Note how `FOREACH` is addressing two semantic concerns simultaneously; namely looping, and performing updates without affecting the cardinality of the outer query. +In the new version of the query shown above, these orthogonal concerns have been separated. +Looping is already handled by `UNWIND`, while `DO` suppresses the increased cardinality from the inner query. + +`DO` also hides all new variable bindings introduced by the inner query from the outer query. +If `DO` is omitted from the new version of the query shown above, the variable `c` would become visible to the remainder of the query. + Doubly-nested subquery - old version using `FOREACH`: [source, cypher] ---- @@ -314,7 +322,7 @@ Both scalar and table expression subqueries are out of scope for the purposes of === SPARQL -https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] only supports uncorrelated subqueries in the standard, exemplified by: +https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] supports uncorrelated subqueries in the standard, exemplified by: [source, cypher] ---- @@ -330,9 +338,8 @@ WHERE { } ---- -Owing to the bottom-up nature of SPARQL query evaluation, the subqueries are evaluated logically first, and the results are projected up to the outer query. - -Only variables projected out of the subquery will be visible, or in scope, to the outer query. +Owing to the bottom-up nature of SPARQL query evaluation, the supported forms of subqueries are evaluated logically first, and the results are projected up to the outer query. +Variables projected out of the subquery will be visible, or in scope, to the outer query. == Benefits to this proposal From 5b5b9ccbd0731248650f90793b6259418359b4d5 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 27 Mar 2017 14:48:10 +0200 Subject: [PATCH 08/27] Sketched out additional forms of nested subqueries. --- cip/CIP2016-06-22-nested-subqueries.adoc | 38 ++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 2933ca5618..1e0febe44f 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -33,7 +33,7 @@ This CIP may be viewed in light of the EXISTS CIP and the forthcoming Pattern Co This proposal suggests the introduction of two new subquery constructs to Cypher. -**1. Read-only subqueries** +**1. Read-only match subqueries** We propose the addition of new syntax to the `MATCH` clause for expressing nested read-only subqueries. @@ -45,7 +45,23 @@ Nested subqueries may be correlated - i.e. the subquery has a dependency on the As this proposal extends the `MATCH` clause, nested subqueries can be contained within other nested subqueries at arbitrary depth. -**2. Write-only/read-write subqueries** +**2. Read-only optional match subqueries** + +We propose the addition of new, abbreviated syntax for expressing nested read-only optional match subqueries. + +A nested read-only optional match subquery takes the form: `OPTIONAL { }`. + +Nested optional match subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. + +**3. Read-only mandatory match subqueries** + +We propose the addition of new, abbreviated syntax for expressing nested read-only mandatory match subqueries. + +A nested read-only mandatory match subquery takes the form: `MANDATORY { }`. + +Nested mandatory match subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. + +**4. Write-only/read-write subqueries** We further propose the addition of a new syntax - the `DO` clause - for expressing nested write-only/read-write subqueries that _do not return any data_. @@ -53,8 +69,24 @@ A nested write-only/read-write subquery is denoted using the following syntax: ` We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. +=== Read-only subquery syntax + +All kinds of read-only subqueries support the following syntactical forms: + +* `KEYWORD { }` +* `KEYWORD { [WHERE ] }` which is syntactic sugar for `KEYWORD { MATCH [WHERE ] WITH * }` +* `KEYWORD MATCH [WHERE ]` which is syntactic sugar for `KEYWORD { MATCH [WHERE ] RETURN * }` + +Here keyword is + +* `OPTIONAL` for read-only optional match subqueries +* `MANDATORY` for read-only mandatory match subqueries +* `MATCH` for read-only match subqueries except for the last form which is just written as `MATCH [WHERE ]` + +All read-only subqueries may end in `RETURN`. +If they do not end with a `RETURN` clause, `RETURN *` is added implicitly. -=== Syntax +=== Grammar changes We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] as follows: From fe214758fa27958144112bef41eeb995c43332a1 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Thu, 30 Mar 2017 17:08:00 +0200 Subject: [PATCH 09/27] Homogeneous syntax for OPTIONAL, MANDATORY, MATCH, DO WHEN --- cip/CIP2016-06-22-nested-subqueries.adoc | 119 +++++++++++------------ 1 file changed, 58 insertions(+), 61 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 1e0febe44f..3395b2accb 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -31,107 +31,92 @@ This CIP may be viewed in light of the EXISTS CIP and the forthcoming Pattern Co == Proposal -This proposal suggests the introduction of two new subquery constructs to Cypher. +Nested subqueries are self-contained Cypher queries that are run within the scope of an outer Cypher query. -**1. Read-only match subqueries** +This proposal suggests the introduction of new nested subquery constructs to Cypher. -We propose the addition of new syntax to the `MATCH` clause for expressing nested read-only subqueries. +* Read-only nested match subqueries of the form `MATCH { [(a)->(b) [WHERE ...]] ... RETURN * }` +* Read-only nested optional match subqueries of the form `OPTIONAL { [(a)->(b) [WHERE ...]] ... RETURN * }` +* Read-only nested mandatory match subqueries of the form `MANDATORY { [(a)->(b) [WHERE ...]] ... [RETURN *] }` +* Read/Write nested subqueries of the form `DO WHEN ... { ... }` (not ending with `RETURN`) -Nested subqueries are self-contained, read-only Cypher queries. +All forms are introduced with a keyword in conjunction with optional subclauses which are then followed by an inner query in curly braces. -A nested read-only subquery is denoted using the following syntax: `MATCH { }`. +Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. -Nested subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. +Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. -As this proposal extends the `MATCH` clause, nested subqueries can be contained within other nested subqueries at arbitrary depth. -**2. Read-only optional match subqueries** +**1. Read-only nested match subqueries** -We propose the addition of new, abbreviated syntax for expressing nested read-only optional match subqueries. +We propose the addition of new syntax to the `MATCH` clause for expressing read-only nested match subqueries. -A nested read-only optional match subquery takes the form: `OPTIONAL { }`. +A nested read-only match subquery is denoted using the following syntax: `MATCH { }`. -Nested optional match subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. +The inner match query is a full read-only Cypher query. -**3. Read-only mandatory match subqueries** +Moreover, any valid read-only Cypher query from which the leading `MATCH` keyword has been omitted may also be used as an inner match query. -We propose the addition of new, abbreviated syntax for expressing nested read-only mandatory match subqueries. +This rule only applies if the leading `MATCH` clause is the root clause of the inner query (i.e. is not the first clause inside a nested query or a `UNION`). -A nested read-only mandatory match subquery takes the form: `MANDATORY { }`. -Nested mandatory match subqueries may be correlated - i.e. the subquery has a dependency on the outer query - or uncorrelated. +**2. Read-only nested optional match subqueries** -**4. Write-only/read-write subqueries** +We propose the addition of a new `OPTIONAL` clause for expressing read-only nested optional match subqueries. -We further propose the addition of a new syntax - the `DO` clause - for expressing nested write-only/read-write subqueries that _do not return any data_. +A nested read-only optional match subquery is denoted using the following syntax: `OPTIONAL { }`. -A nested write-only/read-write subquery is denoted using the following syntax: `DO { }`. -We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. +**3. Read-only nested mandatory match subqueries** -=== Read-only subquery syntax +We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory match subqueries. -All kinds of read-only subqueries support the following syntactical forms: +A nested read-only mandatory match subquery is denoted using the following syntax: `MANDATORY { }`. -* `KEYWORD { }` -* `KEYWORD { [WHERE ] }` which is syntactic sugar for `KEYWORD { MATCH [WHERE ] WITH * }` -* `KEYWORD MATCH [WHERE ]` which is syntactic sugar for `KEYWORD { MATCH [WHERE ] RETURN * }` +The inner mandatory query is any inner match query. -Here keyword is +Moreover, any inner match query from which the trailing final `RETURN` clause has been omitted may also be used as an inner mandatory query. -* `OPTIONAL` for read-only optional match subqueries -* `MANDATORY` for read-only mandatory match subqueries -* `MATCH` for read-only match subqueries except for the last form which is just written as `MATCH [WHERE ]` -All read-only subqueries may end in `RETURN`. -If they do not end with a `RETURN` clause, `RETURN *` is added implicitly. +**4. Read/Write nested subqueries** -=== Grammar changes +We propose the addition of a new `DO` clause for expressing read/write nested subqueries that _do no return any data_. -We extend the https://github.com/opencypher/openCypher/blob/master/grammar/cypher.xml[grammar] as follows: +A nested read/write subquery is denoted using the following syntax: `DO [WHEN predicate] { }`. -**1. Read-only subqueries** +Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. -[source, ebnf] ----- -Match = [ "OPTIONAL" ], ( MatchPattern | NestedReadOnlySubquery ), [ "WHERE", Predicate ] ; -MatchPattern = "MATCH", Pattern ; -NestedReadOnlySubquery = "MATCH", "{", RegularQuery, "}" ; ----- - -**2. Write-only/read-write subqueries** +Using a +We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. -[source, ebnf] ----- -Match = [ "OPTIONAL" ], "MATCH", Pattern, [ "WHERE", Predicate ], NestedWriteSubquery ; -NestedWriteSubquery = Unwind, "DO", "{", WriteSubquery, "}" ; -WriteSubquery = WriteOnlyClauseWithNoReturn, [ NestedWriteSubquery ] | - ReadWriteClauseWithNoReturn, [ NestedWriteSubquery ] ; ----- === Semantic clarification -**1. Read-only subqueries** +**1. Read-only nested subqueries** Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. All incoming variables remain in scope. - -Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. Any other variable bindings introduced in the subquery will not be visible to the outer scope. +Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. +Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. +Any other variable bindings that are introduced temporarily in the subquery will not be visible to the outer scope. Subqueries interact with write clauses in the same way as `MATCH` does. -It is an error for a nested subquery to try to rebind (shadow) a pre-existing outer variable binding. -**2. Write-only/read-write subqueries** +**2. Read/Write subqueries** + +Execution of a `DO` subquery does not change the cardinality; i.e. the inner update query is run for each incoming record (optionally filtered by the given predicate if a `WHEN` sub-clause is present). + +Any input record is always passed on to the clause succeeding the `DO` subquery, irrespective of whether it was eligible for processing by the inner update query. -Execution of a `DO` subquery does not change the cardinality; i.e. the full subquery is run for each incoming record and then the record is passed on to the remainder of the outer query. +A `DO` clause that uses `WHEN` sub-clause is called _conditional DO_. A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. === Examples -**1. Read-only subqueries** +**1. Read-only nested match subqueries** Post-UNION processing: [source, cypher] @@ -153,7 +138,7 @@ ORDER BY time DESC LIMIT 10 ---- -Uncorrelated nested subquery: +Uncorrelated nested match subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) @@ -170,7 +155,7 @@ MATCH { RETURN f, name, code ---- -Correlated nested subquery: +Correlated nested match subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId})-[:IS_IN]->(country:Country) @@ -188,7 +173,7 @@ MATCH { RETURN f, name, code ---- -Filtered and correlated nested subquery: +Filtered and correlated nested match subquery: [source, cypher] ---- MATCH (f:Farm)-[:IS_IN]->(country:Country) @@ -209,12 +194,12 @@ WHERE f.type = 'organic' RETURN f, brand.name AS name, code ---- -Doubly-nested subquery: +Doubly-nested match subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) MATCH { - MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) + (c:Customer)-[:BUYS_FOOD_AT]->(f) MATCH { MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) RETURN c, count(*) AS count @@ -231,7 +216,7 @@ MATCH { RETURN f.name AS name, type, sum(endorsement) AS endorsement ---- -**2. Write-only/read-write subqueries** +**2. Read/Write nested subqueries** We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. @@ -293,6 +278,18 @@ DO { } ---- +Conditional `DO` +[source, cypher] +---- +MATCH (r:Root) +UNWIND range(1, 10) AS x +DO WHEN x % 2 = 1 { + MERGE (c:Odd:Child {id: x}) + MERGE (r)-[:PARENT]->(c) +} +---- + + === Interaction with existing features Apart from the suggested deprecation of the `FOREACH` clause, nested read-only, write-only and read-write subqueries do not interact directly with any existing features. From 80a1ce44451ca2719ea51dd9bf4151c5ef22a4b4 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Thu, 13 Apr 2017 11:46:49 +0200 Subject: [PATCH 10/27] Address feedback and introduce new syntactic short forms --- cip/CIP2016-06-22-nested-subqueries.adoc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 3395b2accb..dbcb06ffd0 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -28,6 +28,7 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature == Background This CIP may be viewed in light of the EXISTS CIP and the forthcoming Pattern Comprehension CIP, both of which propose variants of subqueries. +In contrast, this CIP focusses on subqueries operating at a clause level while both the EXISTS CIP and the forthcoming Pattern Comprehension CIP propose subqueries operating at an expression level. == Proposal @@ -37,7 +38,7 @@ This proposal suggests the introduction of new nested subquery constructs to Cyp * Read-only nested match subqueries of the form `MATCH { [(a)->(b) [WHERE ...]] ... RETURN * }` * Read-only nested optional match subqueries of the form `OPTIONAL { [(a)->(b) [WHERE ...]] ... RETURN * }` -* Read-only nested mandatory match subqueries of the form `MANDATORY { [(a)->(b) [WHERE ...]] ... [RETURN *] }` +* Read-only nested mandatory match subqueries of the form `MANDATORY { [(a)->(b) [WHERE ...]] ... RETURN * }` * Read/Write nested subqueries of the form `DO WHEN ... { ... }` (not ending with `RETURN`) All forms are introduced with a keyword in conjunction with optional subclauses which are then followed by an inner query in curly braces. @@ -46,6 +47,8 @@ Nested subqueries may be correlated - i.e. the inner query may use variables fro Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. +Finally, this CIP proposes new shorthand syntax for non-subquery optional match and mandatory match clauses as well as the ability to return no variables through the introduction of `RETURN -`. + **1. Read-only nested match subqueries** @@ -57,8 +60,6 @@ The inner match query is a full read-only Cypher query. Moreover, any valid read-only Cypher query from which the leading `MATCH` keyword has been omitted may also be used as an inner match query. -This rule only applies if the leading `MATCH` clause is the root clause of the inner query (i.e. is not the first clause inside a nested query or a `UNION`). - **2. Read-only nested optional match subqueries** @@ -71,11 +72,7 @@ A nested read-only optional match subquery is denoted using the following syntax We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory match subqueries. -A nested read-only mandatory match subquery is denoted using the following syntax: `MANDATORY { }`. - -The inner mandatory query is any inner match query. - -Moreover, any inner match query from which the trailing final `RETURN` clause has been omitted may also be used as an inner mandatory query. +A nested read-only mandatory match subquery is denoted using the following syntax: `MANDATORY { }`. **4. Read/Write nested subqueries** @@ -86,10 +83,18 @@ A nested read/write subquery is denoted using the following syntax: `DO [WHEN pr Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. -Using a We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. +**5. Shorthand syntax** + +We propose the addition of new shorthand syntax for harmonizing the integration of subqueries into the language: + +* A query of the form `OPTIONAL ` shall be semantically equivalent to `OPTIONAL MATCH `. +* A query of the form `MANDATORY ` shall be semantically equivalent to `MANDATORY MATCH `. +* A projection clause of the form `RETURN -` will retain the input cardinality but project no result fields, i.e. this allows for *only* checking the cardinality in a mandatory match subquery. + + === Semantic clarification **1. Read-only nested subqueries** From b8f49d6153633aaa1ebb15da4c39709d531c8aba Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Wed, 19 Apr 2017 20:29:12 +0200 Subject: [PATCH 11/27] Add chained subqueries with `THEN` and overhaul document --- cip/CIP2016-06-22-nested-subqueries.adoc | 112 ++++++++++++++++------- 1 file changed, 77 insertions(+), 35 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index dbcb06ffd0..c80f8163d0 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -32,54 +32,73 @@ In contrast, this CIP focusses on subqueries operating at a clause level while b == Proposal -Nested subqueries are self-contained Cypher queries that are run within the scope of an outer Cypher query. +Nested subqueries are self-contained Cypher queries that are usually run within the scope of an outer Cypher query. This proposal suggests the introduction of new nested subquery constructs to Cypher. -* Read-only nested match subqueries of the form `MATCH { [(a)->(b) [WHERE ...]] ... RETURN * }` -* Read-only nested optional match subqueries of the form `OPTIONAL { [(a)->(b) [WHERE ...]] ... RETURN * }` -* Read-only nested mandatory match subqueries of the form `MANDATORY { [(a)->(b) [WHERE ...]] ... RETURN * }` +* Read-only nested major subqueries of the form `{ ... RETURN ... }` +* Read-only nested chained subqueries of the form `THEN { ... RETURN ... }` +* Read-only nested optional subqueries of the form `OPTIONAL { ... RETURN ... }` +* Read-only nested mandatory subqueries of the form `MANDATORY { ... RETURN ... }` * Read/Write nested subqueries of the form `DO WHEN ... { ... }` (not ending with `RETURN`) -All forms are introduced with a keyword in conjunction with optional subclauses which are then followed by an inner query in curly braces. +A nested major subquery just consists of an inner query in curly braces. + +All nested non-major subqueries are introduced with a keyword in conjunction with an inner query in curly braces. Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. -Finally, this CIP proposes new shorthand syntax for non-subquery optional match and mandatory match clauses as well as the ability to return no variables through the introduction of `RETURN -`. +Finally, this CIP proposes new shorthand syntax for returning no fields through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. + + +**1. Read-only nested major subqueries** + +We propose the addition of read-only nested major subqueries as a new form of read-only Cypher query. + +A nested read-only major subquery is denoted using the following syntax: `{ }`. +The inner query could be any complete read-only Cypher query. -**1. Read-only nested match subqueries** +A nested read-only major subquery may only be used as a primary clause, i.e. as a -We propose the addition of new syntax to the `MATCH` clause for expressing read-only nested match subqueries. +* top-level Cypher query, +* inner query of another nested subquery, +* inner query of another expression-level subquery (like a pattern comprehension, or an `EXISTS` subquery), +* argument query to `UNION` and similar clause-level binary operators -A nested read-only match subquery is denoted using the following syntax: `MATCH { }`. +A nested read-only major subquery may not be used as a secondary clause after a preceding primary clause +(However, a nested read-only chained subquery may be used in this case). -The inner match query is a full read-only Cypher query. -Moreover, any valid read-only Cypher query from which the leading `MATCH` keyword has been omitted may also be used as an inner match query. +**2. Read-only nested chained subqueries** +We propose the addition of read-only nested chained subqueries for using nested subqueries as a secondary clause. +This is called _subquery chaining_. -**2. Read-only nested optional match subqueries** +After a chain of clauses, a new nested chained subquery may be introduced as a secondary clause using the `THEN` keyword followed by an inner query in curly braces, i.e. it is denoted using the following syntax: `... THEN { }`. -We propose the addition of a new `OPTIONAL` clause for expressing read-only nested optional match subqueries. -A nested read-only optional match subquery is denoted using the following syntax: `OPTIONAL { }`. +**3. Read-only nested optional subqueries** +We propose the addition of a new `OPTIONAL` clause for expressing read-only nested optional subqueries. -**3. Read-only nested mandatory match subqueries** +A read-only nested optional subquery is denoted using the following syntax: `OPTIONAL { }`. -We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory match subqueries. -A nested read-only mandatory match subquery is denoted using the following syntax: `MANDATORY { }`. +**4. Read-only nested mandatory subqueries** + +We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory subqueries. + +A read-only nested mandatory subquery is denoted using the following syntax: `MANDATORY { }`. **4. Read/Write nested subqueries** We propose the addition of a new `DO` clause for expressing read/write nested subqueries that _do no return any data_. -A nested read/write subquery is denoted using the following syntax: `DO [WHEN predicate] { }`. +A read/write nested subquery is denoted using the following syntax: `DO [WHEN predicate] { }`. Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. @@ -88,11 +107,9 @@ We additionally propose removing the `FOREACH` clause from the current language **5. Shorthand syntax** -We propose the addition of new shorthand syntax for harmonizing the integration of subqueries into the language: +We propose the addition of a new projection clauses of the form `WITH -` and `RETURN -` which will retain the input cardinality but project no result fields, i.e. this allows for *only* checking the cardinality in a read-only nested mandatory subquery. -* A query of the form `OPTIONAL ` shall be semantically equivalent to `OPTIONAL MATCH `. -* A query of the form `MANDATORY ` shall be semantically equivalent to `MANDATORY MATCH `. -* A projection clause of the form `RETURN -` will retain the input cardinality but project no result fields, i.e. this allows for *only* checking the cardinality in a mandatory match subquery. +We propose the addition of a new subclause to `CALL` of the form `YIELD -` which will retain the output cardinality of a call but project no result fields, i.e. this allows for *only* checking the cardinality in an `EXISTS` subquery. === Semantic clarification @@ -121,12 +138,12 @@ A query may end with a `DO` subquery in the same way that a query can currently === Examples -**1. Read-only nested match subqueries** +**1. Read-only nested subqueries** Post-UNION processing: [source, cypher] ---- -MATCH { +{ // authored tweets MATCH (me:User {name: 'Alice'})-[:FOLLOWS]->(user:User), (user)<-[:AUTHORED]-(tweet:Tweet) @@ -143,11 +160,11 @@ ORDER BY time DESC LIMIT 10 ---- -Uncorrelated nested match subquery: +Uncorrelated nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -MATCH { +THEN { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -160,11 +177,11 @@ MATCH { RETURN f, name, code ---- -Correlated nested match subquery: +Correlated nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId})-[:IS_IN]->(country:Country) -MATCH { +THEN { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -178,12 +195,12 @@ MATCH { RETURN f, name, code ---- -Filtered and correlated nested match subquery: +Filtered and correlated nested subquery: [source, cypher] ---- MATCH (f:Farm)-[:IS_IN]->(country:Country) WHERE country.name IN $countryNames -MATCH { +THEN { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b AS brand, p.code AS code @@ -199,13 +216,13 @@ WHERE f.type = 'organic' RETURN f, brand.name AS name, code ---- -Doubly-nested match subquery: +Doubly-nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -MATCH { - (c:Customer)-[:BUYS_FOOD_AT]->(f) - MATCH { +THEN { + MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) + THEN { MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) RETURN c, count(*) AS count UNION @@ -221,7 +238,32 @@ MATCH { RETURN f.name AS name, type, sum(endorsement) AS endorsement ---- -**2. Read/Write nested subqueries** +**2. Read-only nested optional match and mandatory subqueries** + +This proposal also provides nested subquery forms of `OPTIONAL MATCH` and `MANDATORY MATCH`: + +[source, cypher] +---- +MANDATORY MATCH (p:Person {name: 'Petra'}) +MANDATORY { + MATCH (p)-[:ATTENDS]->(conf:Conference {name: $conf}) + RETURN conf + UNION + MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf:Conference {name: $conf}) + RETURN conf +} +OPTIONAL { + MATCH (p)-[:KNOWS]->(a:Attendee)-[:PUBLISHED_AT]->(conf) + RETURN a.name AS name + UNION + MATCH (p)-[:KNOWS]->(a:Attendee)-[:PRESENTED_AT]->(conf) + RETURN a.name AS name +} +RETURN name +---- + + +**3. Read/Write nested subqueries** We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. From bf532520fb3207a248b7040e145ab727703c2050 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Thu, 20 Apr 2017 22:25:31 +0200 Subject: [PATCH 12/27] Reflect discussion; add new conditional form of DO and WHERE shorthand --- cip/CIP2016-06-22-nested-subqueries.adoc | 76 ++++++++++++++++-------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index c80f8163d0..c252d80986 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -36,39 +36,42 @@ Nested subqueries are self-contained Cypher queries that are usually run within This proposal suggests the introduction of new nested subquery constructs to Cypher. -* Read-only nested major subqueries of the form `{ ... RETURN ... }` +* Read-only nested simple subqueries of the form `{ ... RETURN ... }` * Read-only nested chained subqueries of the form `THEN { ... RETURN ... }` * Read-only nested optional subqueries of the form `OPTIONAL { ... RETURN ... }` * Read-only nested mandatory subqueries of the form `MANDATORY { ... RETURN ... }` -* Read/Write nested subqueries of the form `DO WHEN ... { ... }` (not ending with `RETURN`) +* Read/Write nested simple updating subqueries of the form `DO { ... }` (inner query not ending with `RETURN`) +* Read/Write nested conditionally updating subqueries of the form `DO [WHEN cond THEN { ... }]+ [ELSE { ... }] END` (inner queries not ending with `RETURN`) -A nested major subquery just consists of an inner query in curly braces. +A nested simple subquery just consists of an inner query in curly braces. -All nested non-major subqueries are introduced with a keyword in conjunction with an inner query in curly braces. +All other nested subquery constructs are introduced with a keyword in conjunction with an inner query in curly braces. Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. -Finally, this CIP proposes new shorthand syntax for returning no fields through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. +Read/Write nested subqueries cannot be contained within other read-only nested subqueries. +Finally, this CIP proposes new shorthand syntax for starting a query with `WHERE` and for returning no fields through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. -**1. Read-only nested major subqueries** -We propose the addition of read-only nested major subqueries as a new form of read-only Cypher query. +**1. Read-only nested simple subqueries** -A nested read-only major subquery is denoted using the following syntax: `{ }`. +We propose the addition of read-only nested simple subqueries as a new form of read-only Cypher query. + +A nested read-only simple subquery is denoted using the following syntax: `{ }`. The inner query could be any complete read-only Cypher query. -A nested read-only major subquery may only be used as a primary clause, i.e. as a +A nested read-only simple subquery may only be used as a primary clause, i.e. as a * top-level Cypher query, * inner query of another nested subquery, * inner query of another expression-level subquery (like a pattern comprehension, or an `EXISTS` subquery), * argument query to `UNION` and similar clause-level binary operators -A nested read-only major subquery may not be used as a secondary clause after a preceding primary clause +A nested read-only simple subquery may not be used as a secondary clause after a preceding primary clause (However, a nested read-only chained subquery may be used in this case). @@ -94,18 +97,31 @@ We propose the addition of a new `MANDATORY` clause for expressing read-only nes A read-only nested mandatory subquery is denoted using the following syntax: `MANDATORY { }`. -**4. Read/Write nested subqueries** +**4. Read/Write nested simple updating subqueries** -We propose the addition of a new `DO` clause for expressing read/write nested subqueries that _do no return any data_. +We propose the addition of a new `DO` clause for expressing read/write nested simple updating subqueries that _do no return any data_. -A read/write nested subquery is denoted using the following syntax: `DO [WHEN predicate] { }`. +A read/write nested simple udating subquery is denoted using the following syntax: `DO { }`. Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. -**5. Shorthand syntax** +**5. Read/Write nested conditionally udating subqueries** + +We propose the addition of a second form of the `DO` clause for expressing read/write nested conditionally updating subqueries that _do no return any data_. + +A read/write nested conditionally updating subquery is denoted using the following syntax: `DO { [WHEN THEN ]+ [ELSE ] END`. + +Semantically, the `WHEN` conditions are tested in the order given, and the inner updating query is executed for the first condition that evaluates to true only. +If no given `WHEN` condition evaluates to true and an `ELSE` branch was given, the inner updating query of the `ELSE` branch is executed. +If no given `WHEN` condition evaluates to true and no `ELSE` branch was given, no updates will be executed. + + +**6. Shorthand syntax** + +We propose the addition of a new clause `WHERE ` as a shorthand syntax for `WITH * WHERE THEN { }` that may be used as a primary clause only (e.g. as the first clause of a nested subquery). We propose the addition of a new projection clauses of the form `WITH -` and `RETURN -` which will retain the input cardinality but project no result fields, i.e. this allows for *only* checking the cardinality in a read-only nested mandatory subquery. @@ -118,8 +134,9 @@ We propose the addition of a new subclause to `CALL` of the form `YIELD -` which Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. -All incoming variables remain in scope. +All incoming variables remain in scope throughout the whole subquery. Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. +It is valid (though redundant) if incoming variables from the outer scope are passed on explicitly by any projection clause of the subquery (including the final `RETURN`). Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. Any other variable bindings that are introduced temporarily in the subquery will not be visible to the outer scope. @@ -128,17 +145,17 @@ Subqueries interact with write clauses in the same way as `MATCH` does. **2. Read/Write subqueries** -Execution of a `DO` subquery does not change the cardinality; i.e. the inner update query is run for each incoming record (optionally filtered by the given predicate if a `WHEN` sub-clause is present). +Execution of a `DO` subquery does not change the cardinality; i.e. the inner update query is run for each incoming record. -Any input record is always passed on to the clause succeeding the `DO` subquery, irrespective of whether it was eligible for processing by the inner update query. +Any input record is always passed on to the clause succeeding the `DO` subquery, irrespective of whether it was eligible for processing by any inner update query. -A `DO` clause that uses `WHEN` sub-clause is called _conditional DO_. +A `DO` clause that uses `WHEN` sub-clause is called a _conditional DO_. A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. === Examples -**1. Read-only nested subqueries** +**1. Read-only nested simple and chained subqueries** Post-UNION processing: [source, cypher] @@ -245,11 +262,13 @@ This proposal also provides nested subquery forms of `OPTIONAL MATCH` and `MANDA [source, cypher] ---- MANDATORY MATCH (p:Person {name: 'Petra'}) +MANDATORY MATCH (conf:Conference {name: $conf}) MANDATORY { - MATCH (p)-[:ATTENDS]->(conf:Conference {name: $conf}) + WHERE conf.impact > 5 + MATCH (p)-[:ATTENDS]->(conf) RETURN conf UNION - MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf:Conference {name: $conf}) + MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf) RETURN conf } OPTIONAL { @@ -263,7 +282,7 @@ RETURN name ---- -**3. Read/Write nested subqueries** +**3. Read/Write nested simple and conditionally updating subqueries** We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. @@ -330,10 +349,15 @@ Conditional `DO` ---- MATCH (r:Root) UNWIND range(1, 10) AS x -DO WHEN x % 2 = 1 { - MERGE (c:Odd:Child {id: x}) - MERGE (r)-[:PARENT]->(c) -} +DO WHEN x % 2 = 1 THEN { + MERGE (c:Odd:Child {id: x}) + MERGE (r)-[:PARENT]->(c) + } + ELSE { + MERGE (c:Even:Child {id: x}) + MERGE (r)-[:PARENT]->(c) + } +END ---- From 70a91cd5cf325ce99f08242b0d5550ac1c1864fa Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Fri, 21 Apr 2017 17:51:21 +0100 Subject: [PATCH 13/27] Textual improvements --- cip/CIP2016-06-22-nested-subqueries.adoc | 75 ++++++++++++++---------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index c252d80986..68d7215668 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -23,12 +23,12 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature * Better query construction and readability * Easier query composition and reuse * Post-processing as a single unit results from multiple queries - * Perform a sequence of multiple write commands for each record + * Performing a sequence of multiple write commands for each record == Background -This CIP may be viewed in light of the EXISTS CIP and the forthcoming Pattern Comprehension CIP, both of which propose variants of subqueries. -In contrast, this CIP focusses on subqueries operating at a clause level while both the EXISTS CIP and the forthcoming Pattern Comprehension CIP propose subqueries operating at an expression level. +This CIP may be viewed in light of the EXISTS CIP, the Scalar Subqueries and List Subqueries CIP, and the Map Projection CIP, all of which propose variants of subqueries. +In contrast, this CIP focusses on subqueries operating at a clause level while the EXISTS CIP and Map Projection CIP propose subqueries operating at an expression level. == Proposal @@ -41,9 +41,9 @@ This proposal suggests the introduction of new nested subquery constructs to Cyp * Read-only nested optional subqueries of the form `OPTIONAL { ... RETURN ... }` * Read-only nested mandatory subqueries of the form `MANDATORY { ... RETURN ... }` * Read/Write nested simple updating subqueries of the form `DO { ... }` (inner query not ending with `RETURN`) -* Read/Write nested conditionally updating subqueries of the form `DO [WHEN cond THEN { ... }]+ [ELSE { ... }] END` (inner queries not ending with `RETURN`) +* Read/Write nested conditionally-updating subqueries of the form `DO [WHEN cond THEN { ... }]+ [ELSE { ... }] END` (inner queries not ending with `RETURN`) -A nested simple subquery just consists of an inner query in curly braces. +A nested simple subquery consists of an inner query in curly braces. All other nested subquery constructs are introduced with a keyword in conjunction with an inner query in curly braces. @@ -53,7 +53,7 @@ Nested subqueries can be contained within other nested subqueries at an arbitrar Read/Write nested subqueries cannot be contained within other read-only nested subqueries. -Finally, this CIP proposes new shorthand syntax for starting a query with `WHERE` and for returning no fields through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. +Finally, this CIP proposes new shorthand syntax for starting a query with `WHERE`, along with the ability to specify that no fields are to be returned through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. **1. Read-only nested simple subqueries** @@ -62,17 +62,17 @@ We propose the addition of read-only nested simple subqueries as a new form of r A nested read-only simple subquery is denoted using the following syntax: `{ }`. -The inner query could be any complete read-only Cypher query. +The inner query can be any complete read-only Cypher query. A nested read-only simple subquery may only be used as a primary clause, i.e. as a * top-level Cypher query, * inner query of another nested subquery, -* inner query of another expression-level subquery (like a pattern comprehension, or an `EXISTS` subquery), +* inner query of another expression-level subquery (such as a pattern comprehension, or an `EXISTS` subquery), * argument query to `UNION` and similar clause-level binary operators -A nested read-only simple subquery may not be used as a secondary clause after a preceding primary clause -(However, a nested read-only chained subquery may be used in this case). +A nested read-only simple subquery may not be used as a secondary clause after a preceding primary clause. +(However, a nested read-only chained subquery may be used in this case.) **2. Read-only nested chained subqueries** @@ -87,45 +87,58 @@ After a chain of clauses, a new nested chained subquery may be introduced as a s We propose the addition of a new `OPTIONAL` clause for expressing read-only nested optional subqueries. -A read-only nested optional subquery is denoted using the following syntax: `OPTIONAL { }`. +A read-only nested optional subquery is denoted by the following syntax: `OPTIONAL { }`. **4. Read-only nested mandatory subqueries** We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory subqueries. -A read-only nested mandatory subquery is denoted using the following syntax: `MANDATORY { }`. +A read-only nested mandatory subquery is denoted by the following syntax: `MANDATORY { }`. **4. Read/Write nested simple updating subqueries** -We propose the addition of a new `DO` clause for expressing read/write nested simple updating subqueries that _do no return any data_. +We propose the addition of a new `DO` clause for expressing read/write nested simple updating subqueries that _do not return any data_. -A read/write nested simple udating subquery is denoted using the following syntax: `DO { }`. +A read/write nested simple updating subquery is denoted by the following syntax: `DO { }`. Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. -**5. Read/Write nested conditionally udating subqueries** +**5. Read/Write nested conditionally-updating subqueries** -We propose the addition of a second form of the `DO` clause for expressing read/write nested conditionally updating subqueries that _do no return any data_. +We propose the addition of a second form of the `DO` clause for expressing read/write nested conditionally-updating subqueries that _do not return any data_. -A read/write nested conditionally updating subquery is denoted using the following syntax: `DO { [WHEN THEN ]+ [ELSE ] END`. +A read/write nested conditionally-updating subquery is denoted by the following syntax: -Semantically, the `WHEN` conditions are tested in the order given, and the inner updating query is executed for the first condition that evaluates to true only. -If no given `WHEN` condition evaluates to true and an `ELSE` branch was given, the inner updating query of the `ELSE` branch is executed. -If no given `WHEN` condition evaluates to true and no `ELSE` branch was given, no updates will be executed. +``` +DO + [WHEN THEN ]+ + [ELSE ] +END +``` + + +Evaluation proceeds as follows: + +* Semantically, the `WHEN` conditions are tested in the order given, and the inner updating query is executed for only the first condition that evaluates to `true`. +* If no given `WHEN` condition evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. +* If no given `WHEN` condition evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. **6. Shorthand syntax** -We propose the addition of a new clause `WHERE ` as a shorthand syntax for `WITH * WHERE THEN { }` that may be used as a primary clause only (e.g. as the first clause of a nested subquery). +We propose the addition of a new clause `WHERE ` as a shorthand syntax for `WITH * WHERE THEN { }`. +The idea is for this to be used exclusively as a primary clause; for example, as the first clause of a nested subquery. -We propose the addition of a new projection clauses of the form `WITH -` and `RETURN -` which will retain the input cardinality but project no result fields, i.e. this allows for *only* checking the cardinality in a read-only nested mandatory subquery. +We propose the addition of a new projection clauses of the form `WITH -` and `RETURN -`, which will retain the input cardinality but project no result fields. +This allows for *only* checking the cardinality in a read-only nested mandatory subquery. -We propose the addition of a new subclause to `CALL` of the form `YIELD -` which will retain the output cardinality of a call but project no result fields, i.e. this allows for *only* checking the cardinality in an `EXISTS` subquery. +We propose the addition of a new subclause to `CALL` of the form `YIELD -`, which will retain the output cardinality of a call but project no result fields. +This allows for *only* checking the cardinality in an `EXISTS` subquery. === Semantic clarification @@ -134,11 +147,13 @@ We propose the addition of a new subclause to `CALL` of the form `YIELD -` which Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. -All incoming variables remain in scope throughout the whole subquery. -Any new variable bindings introduced by the final `RETURN` clause when evaluating the subquery will augment the variable bindings of the initial record. -It is valid (though redundant) if incoming variables from the outer scope are passed on explicitly by any projection clause of the subquery (including the final `RETURN`). -Therefore, nested subqueries cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. -Any other variable bindings that are introduced temporarily in the subquery will not be visible to the outer scope. +The rules regarding variable scoping are detailed as follows: + +* All incoming variables remain in scope throughout the whole subquery. +* When evaluating the subquery, any new variable bindings introduced by the final `RETURN` clause will augment the variable bindings of the initial record. +* It is valid (though redundant) if incoming variables from the outer scope are passed on explicitly by any projection clause of the subquery (including the final `RETURN`). +* Nested subqueries therefore cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. +* Any other variable bindings that are introduced temporarily in the subquery will not be visible to the outer scope. Subqueries interact with write clauses in the same way as `MATCH` does. @@ -282,7 +297,7 @@ RETURN name ---- -**3. Read/Write nested simple and conditionally updating subqueries** +**3. Read/Write nested simple and conditionally-updating subqueries** We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. @@ -418,7 +433,7 @@ FROM FROM Orders) AS D ---- -Both scalar and table expression subqueries are out of scope for the purposes of this CIP. They will be addressed in forthcoming CIPs. +Scalar and list subqueries are addressed in the Scalar Subqueries and List Subqueries CIP. === SPARQL From 1f02e2b35deb619fab8e951dab579df8f53ed7e6 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Fri, 21 Apr 2017 18:16:48 +0100 Subject: [PATCH 14/27] Refer to Query Combinator CIP --- cip/CIP2016-06-22-nested-subqueries.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index 68d7215668..d4a50e9175 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -81,6 +81,7 @@ We propose the addition of read-only nested chained subqueries for using nested This is called _subquery chaining_. After a chain of clauses, a new nested chained subquery may be introduced as a secondary clause using the `THEN` keyword followed by an inner query in curly braces, i.e. it is denoted using the following syntax: `... THEN { }`. +`THEN` is a query combinator and more details may be found in the Query Combinator CIP. **3. Read-only nested optional subqueries** From cc176e87813be87897d9194fe4972fdacd5f6217 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Tue, 2 May 2017 01:15:23 +0200 Subject: [PATCH 15/27] Wording --- cip/CIP2016-06-22-nested-subqueries.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-subqueries.adoc index d4a50e9175..f6fc90dff7 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-subqueries.adoc @@ -22,7 +22,7 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature * Increased query expressivity * Better query construction and readability * Easier query composition and reuse - * Post-processing as a single unit results from multiple queries + * Post-processing results from multiple queries as a single unit * Performing a sequence of multiple write commands for each record == Background @@ -77,10 +77,10 @@ A nested read-only simple subquery may not be used as a secondary clause after a **2. Read-only nested chained subqueries** -We propose the addition of read-only nested chained subqueries for using nested subqueries as a secondary clause. +We propose the addition of read-only nested chained subqueries for using nested subqueries in a similar position as a secondary clause. This is called _subquery chaining_. -After a chain of clauses, a new nested chained subquery may be introduced as a secondary clause using the `THEN` keyword followed by an inner query in curly braces, i.e. it is denoted using the following syntax: `... THEN { }`. +After a chain of clauses that together form a query, a new nested chained subquery may be introduced as a secondary clause using the `THEN` keyword followed by an inner query in curly braces, i.e. it is denoted using the following syntax: `... THEN { }`. `THEN` is a query combinator and more details may be found in the Query Combinator CIP. From 29211122d9f99dd874ff722aea5f742ac27b30e3 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 16 Oct 2017 17:19:47 +0200 Subject: [PATCH 16/27] Rework CIP - Clear separation between additive and replacing semantics - Additive semantics for nesting with {} - Replacing semantics for flat composition - Use THEN for discard cardinality - Use WITH|RETURN|YIELD NOTHING for discard fields --- ...sted-updating-and-chained-subqueries.adoc} | 200 ++++++++++-------- 1 file changed, 111 insertions(+), 89 deletions(-) rename cip/{CIP2016-06-22-nested-subqueries.adoc => CIP2016-06-22-nested-updating-and-chained-subqueries.adoc} (50%) diff --git a/cip/CIP2016-06-22-nested-subqueries.adoc b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc similarity index 50% rename from cip/CIP2016-06-22-nested-subqueries.adoc rename to cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index f6fc90dff7..c4487baf85 100644 --- a/cip/CIP2016-06-22-nested-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -1,4 +1,4 @@ -= CIP2016-06-22 - Nested subqueries += CIP2016-06-22 - Nested, updating, and chained subqueries :numbered: :toc: :toc-placement: macro @@ -9,7 +9,7 @@ [abstract] .Abstract -- -This CIP proposes the incorporation of nested subqueries into Cypher. +This CIP proposes the incorporation of nested, updating, and chained subqueries into Cypher. -- toc::[] @@ -21,162 +21,165 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature * Increased query expressivity * Better query construction and readability - * Easier query composition and reuse + * Easier composition of simple query pipelines * Post-processing results from multiple queries as a single unit * Performing a sequence of multiple write commands for each record == Background -This CIP may be viewed in light of the EXISTS CIP, the Scalar Subqueries and List Subqueries CIP, and the Map Projection CIP, all of which propose variants of subqueries. -In contrast, this CIP focusses on subqueries operating at a clause level while the EXISTS CIP and Map Projection CIP propose subqueries operating at an expression level. +This CIP may be viewed in light of CIPs for query combinators and set operations, `EXISTS`, scalar subqueries, and list subqueries. == Proposal -Nested subqueries are self-contained Cypher queries that are usually run within the scope of an outer Cypher query. +Subqueries are self-contained Cypher queries that are usually run within the scope of an outer, containing Cypher query. -This proposal suggests the introduction of new nested subquery constructs to Cypher. +This proposal suggests the introduction of new subquery constructs to Cypher. -* Read-only nested simple subqueries of the form `{ ... RETURN ... }` -* Read-only nested chained subqueries of the form `THEN { ... RETURN ... }` -* Read-only nested optional subqueries of the form `OPTIONAL { ... RETURN ... }` -* Read-only nested mandatory subqueries of the form `MANDATORY { ... RETURN ... }` -* Read/Write nested simple updating subqueries of the form `DO { ... }` (inner query not ending with `RETURN`) -* Read/Write nested conditionally-updating subqueries of the form `DO [WHEN cond THEN { ... }]+ [ELSE { ... }] END` (inner queries not ending with `RETURN`) +* Read-only nested subqueries +** Read-only nested regular subqueries of the form `MATCH { }` +** Read-only nested optional subqueries of the form `OPTIONAL MATCH { }` +** Read-only nested mandatory subqueries of the form `MANDATORY MATCH { }` +* Read/Write updating subqueries +** Read/Write simple updating subqueries of the form `DO { }` (inner query not ending with `RETURN`) +** Read/Write conditionally-updating subqueries of the form `DO [WHEN THEN { }]+ [ELSE { }] END` (inner queries not ending with `RETURN`) +* Chained subqueries +** Chained data-dependent subqueries by extending the `WITH` projection clause that have the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. +** Chained data-independent subqueries by introducing the new `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. -A nested simple subquery consists of an inner query in curly braces. +We additionally propose removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of `DO`). -All other nested subquery constructs are introduced with a keyword in conjunction with an inner query in curly braces. +Subquery constructs are always introduced with a keyword(s) in conjunction with an inner query in curly braces. -Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. +Subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. -Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. +Subqueries can be contained within other subqueries at an arbitrary (but finite) depth. -Read/Write nested subqueries cannot be contained within other read-only nested subqueries. +Read/Write subqueries cannot be contained within other read-only subqueries. -Finally, this CIP proposes new shorthand syntax for starting a query with `WHERE`, along with the ability to specify that no fields are to be returned through the introduction of `WITH -`, `RETURN -`, and `YIELD -`. +=== Read-only nested subqueries -**1. Read-only nested simple subqueries** +Conceptually, a nested subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. -We propose the addition of read-only nested simple subqueries as a new form of read-only Cypher query. +==== Read-only nested regular subqueries -A nested read-only simple subquery is denoted using the following syntax: `{ }`. +We propose the addition of read-only nested regular subqueries as a new form of read-only Cypher query. -The inner query can be any complete read-only Cypher query. +A nested read-only simple subquery is denoted using the following syntax: `MATCH { }`. -A nested read-only simple subquery may only be used as a primary clause, i.e. as a +The inner query can be any complete read-only Cypher query. -* top-level Cypher query, -* inner query of another nested subquery, -* inner query of another expression-level subquery (such as a pattern comprehension, or an `EXISTS` subquery), -* argument query to `UNION` and similar clause-level binary operators +==== Read-only nested optional subqueries -A nested read-only simple subquery may not be used as a secondary clause after a preceding primary clause. -(However, a nested read-only chained subquery may be used in this case.) +We propose extending the `OPTIONAL MATCH` clause to express read-only nested optional subqueries. +A read-only nested optional subquery is denoted by the following syntax: `OPTIONAL MATCH { }`. -**2. Read-only nested chained subqueries** +==== Read-only nested mandatory subqueries -We propose the addition of read-only nested chained subqueries for using nested subqueries in a similar position as a secondary clause. -This is called _subquery chaining_. +We propose extending the `MANDATORY MATCH` clause to express read-only nested mandatory subqueries. -After a chain of clauses that together form a query, a new nested chained subquery may be introduced as a secondary clause using the `THEN` keyword followed by an inner query in curly braces, i.e. it is denoted using the following syntax: `... THEN { }`. -`THEN` is a query combinator and more details may be found in the Query Combinator CIP. +A read-only nested mandatory subquery is denoted by the following syntax: `MANDATORY MATCH { }`. +==== Semantics -**3. Read-only nested optional subqueries** +The nested subquery will be provided with all variables visible in the outer query as subquery input. -We propose the addition of a new `OPTIONAL` clause for expressing read-only nested optional subqueries. +All records returned by the final `RETURN` clause of the subquery will be augmented with the variable bindings of the initial input record from the outer query to form the output records of the subquery. +No other variable bindings will be added to the output records. +If an incoming variable is either discarded or shadows within the subquery, an error will be raised if the subquery returns that variable to the outer query. -A read-only nested optional subquery is denoted by the following syntax: `OPTIONAL { }`. +Finally, the result records of the different forms of nested subqueries are formed as follows: +* The result records of a read-only regular subquery are just the output records. +* The result records of a read-only optional subquery are all the output records (if there is at least one output record), or a single record with the same fields as the output records where all newly introduced variable bindings are set to `NULL`. +* The result records of a read-only mandatory subquery are just the output records. However, if the set of output records is empty, an error is raised in the same way as regular `MANDATORY MATCH`. -**4. Read-only nested mandatory subqueries** +Nested subqueries interact with write clauses in the same way as `MATCH` does. -We propose the addition of a new `MANDATORY` clause for expressing read-only nested mandatory subqueries. -A read-only nested mandatory subquery is denoted by the following syntax: `MANDATORY { }`. +=== Read/Write updating subqueries +Updating subqueries never change the cardinality; i.e. the inner update query is run for each incoming input record. -**4. Read/Write nested simple updating subqueries** +==== Read/Write simple updating subqueries -We propose the addition of a new `DO` clause for expressing read/write nested simple updating subqueries that _do not return any data_. +We propose the addition of a new `DO` clause for expressing read/write simple updating subqueries that _do not return any data_ from the inner query. -A read/write nested simple updating subquery is denoted by the following syntax: `DO { }`. +A read/write simple updating subquery is denoted by the following syntax: `DO { }`. Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. -We additionally propose removing the `FOREACH` clause from the current language as it is rendered obsolete by the introduction of `DO`. - +A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. -**5. Read/Write nested conditionally-updating subqueries** +==== Read/Write conditionally-updating subqueries -We propose the addition of a second form of the `DO` clause for expressing read/write nested conditionally-updating subqueries that _do not return any data_. +We propose the addition of a new conditional `DO` clause for expressing read/write conditionally-updating subqueries that _do not return any data_ from the inner query. -A read/write nested conditionally-updating subquery is denoted by the following syntax: +A read/write conditionally-updating subquery is denoted by the following syntax: ``` DO - [WHEN THEN ]+ + [WHEN THEN ]+ [ELSE ] END ``` - Evaluation proceeds as follows: -* Semantically, the `WHEN` conditions are tested in the order given, and the inner updating query is executed for only the first condition that evaluates to `true`. -* If no given `WHEN` condition evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. -* If no given `WHEN` condition evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. +* Semantically, the `WHEN` predicates are tested in the order given, and the inner updating query is executed for only the first predicate that evaluates to `true`. +* If no given `WHEN` predicates evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. +* If no given `WHEN` predicates evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. +A query may end with a conditional `DO` subquery in the same way that a query can currently end with any update clause. -**6. Shorthand syntax** -We propose the addition of a new clause `WHERE ` as a shorthand syntax for `WITH * WHERE THEN { }`. -The idea is for this to be used exclusively as a primary clause; for example, as the first clause of a nested subquery. +=== Chained subqueries -We propose the addition of a new projection clauses of the form `WITH -` and `RETURN -`, which will retain the input cardinality but project no result fields. -This allows for *only* checking the cardinality in a read-only nested mandatory subquery. +==== Chained data-dependent subqueries -We propose the addition of a new subclause to `CALL` of the form `YIELD -`, which will retain the output cardinality of a call but project no result fields. -This allows for *only* checking the cardinality in an `EXISTS` subquery. +We propose extending the `WITH` projection clause to sequentially compose arbitrary queries to form a chained data-dependent subquery without resorting to nesting and indentation (e.g. as a short-hand syntax for post-UNION processing). +Chained data-dependent subqueries have the following general form ` WITH ... `. -=== Semantic clarification +Both ` and `` are arbitrary, complete Cypher queries. -**1. Read-only nested subqueries** +Conceptually, the query `` is evaluated for each incoming input record from the query `` and may produce an arbitrary number of result records. +In other words, the query `` will be provided with all variables returned by the query `` as input variable bindings. -Conceptually, a nested subquery is evaluated for each incoming record and may produce an arbitrary number of result records. +Furthermore, this CIP proposes allowing a leading `WITH` to project variables from expressions that refer to unbound variables from the preceding scope (or query). +This set of referenced, unbound variables of such a leading `WITH` is understood to implicitly declare the input variables required for the query to execute. -The rules regarding variable scoping are detailed as follows: +Note:: This mechanism allows composing a Cypher query with inputs that have been constructed programmatically. -* All incoming variables remain in scope throughout the whole subquery. -* When evaluating the subquery, any new variable bindings introduced by the final `RETURN` clause will augment the variable bindings of the initial record. -* It is valid (though redundant) if incoming variables from the outer scope are passed on explicitly by any projection clause of the subquery (including the final `RETURN`). -* Nested subqueries therefore cannot shadow variables present in the outer scope, and thus behave in the same way as `UNWIND` and `CALL` with regard to the introduction of new variable bindings. -* Any other variable bindings that are introduced temporarily in the subquery will not be visible to the outer scope. +==== Chained data-independent subqueries -Subqueries interact with write clauses in the same way as `MATCH` does. +We propose introducing the `THEN` projection clause to sequentially compose two arbitrary subqueries to form a chained data-independent subquery without resorting to nesting and indentation. +Chained data-independent subqueries have the following general form ` THEN `. -**2. Read/Write subqueries** +Both ` and `` are arbitrary, complete Cypher queries. +No variables and no input records are passed from `` to ``. +Instead `` is executed in a standalone fashion after the execution of `` has finished. -Execution of a `DO` subquery does not change the cardinality; i.e. the inner update query is run for each incoming record. +Furthermore, this CIP proposes allowing queries to start with a leading `THEN` for discarding all variables in scope as well as the cardinality of all input records provided by the surrounding execution environment. -Any input record is always passed on to the clause succeeding the `DO` subquery, irrespective of whether it was eligible for processing by any inner update query. +Note:: This mechanism allows guaranteed execution of `` irrespective of the number of records produced by ``. -A `DO` clause that uses `WHEN` sub-clause is called a _conditional DO_. +Note:: In general, `` is expected to be an updating query and it is recommended that implementations generate a warning if this is not the case (to inform the user that `` is essentially superfluous). -A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. +==== Discarding variables in scope + +Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. === Examples -**1. Read-only nested simple and chained subqueries** +==== Read-only nested regular subqueries Post-UNION processing: [source, cypher] ---- -{ +MATCH { // authored tweets MATCH (me:User {name: 'Alice'})-[:FOLLOWS]->(user:User), (user)<-[:AUTHORED]-(tweet:Tweet) @@ -197,7 +200,7 @@ Uncorrelated nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -THEN { +MATCH { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -214,7 +217,7 @@ Correlated nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId})-[:IS_IN]->(country:Country) -THEN { +MATCH { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -233,7 +236,7 @@ Filtered and correlated nested subquery: ---- MATCH (f:Farm)-[:IS_IN]->(country:Country) WHERE country.name IN $countryNames -THEN { +MATCH { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b AS brand, p.code AS code @@ -253,9 +256,9 @@ Doubly-nested subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -THEN { +MATCH { MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) - THEN { + MATCH { MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) RETURN c, count(*) AS count UNION @@ -271,7 +274,7 @@ THEN { RETURN f.name AS name, type, sum(endorsement) AS endorsement ---- -**2. Read-only nested optional match and mandatory subqueries** +===== Read-only nested optional and mandatory subqueries This proposal also provides nested subquery forms of `OPTIONAL MATCH` and `MANDATORY MATCH`: @@ -279,15 +282,15 @@ This proposal also provides nested subquery forms of `OPTIONAL MATCH` and `MANDA ---- MANDATORY MATCH (p:Person {name: 'Petra'}) MANDATORY MATCH (conf:Conference {name: $conf}) -MANDATORY { - WHERE conf.impact > 5 +MANDATORY MATCH { + WITH * WHERE conf.impact > 5 MATCH (p)-[:ATTENDS]->(conf) RETURN conf UNION MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf) RETURN conf } -OPTIONAL { +OPTIONAL MATCH { MATCH (p)-[:KNOWS]->(a:Attendee)-[:PUBLISHED_AT]->(conf) RETURN a.name AS name UNION @@ -298,7 +301,7 @@ RETURN name ---- -**3. Read/Write nested simple and conditionally-updating subqueries** +==== Read/Write simple updating and conditionally-updating subqueries We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. @@ -376,12 +379,31 @@ DO WHEN x % 2 = 1 THEN { END ---- +==== Chained subqueries + +Combining nested and chained subqueries +[source, cypher] +---- +MATCH (x)-[:IN]->(:Category {name: "A"}) +WITH x LIMIT 5 +MATCH (x)-[:FROM]-(c :City) +RETURN x, c +UNION +MATCH (x)-[:IN]->(:Category {name: "A"}) +WITH x LIMIT 10 +MATCH (x)-[:FROM]-(c :City) +// This finished the right arm of the UNION +RETURN x, c +// This applies to the whole UNION +WITH x.name AS name ORDER BY x.age +RETURN x LIMIT 10 +---- === Interaction with existing features Apart from the suggested deprecation of the `FOREACH` clause, nested read-only, write-only and read-write subqueries do not interact directly with any existing features. -=== Alternatives +== Alternatives Alternative syntax has been considered during the production of this document: From 3ed1ca99590aaa7242f43d834320d479aeb3d86e Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 16 Oct 2017 17:29:47 +0200 Subject: [PATCH 17/27] Clarify precedence rules --- ...2-nested-updating-and-chained-subqueries.adoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index c4487baf85..6df6ec1b0b 100644 --- a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -168,6 +168,22 @@ Note:: This mechanism allows guaranteed execution of `` irrespective of the Note:: In general, `` is expected to be an updating query and it is recommended that implementations generate a warning if this is not the case (to inform the user that `` is essentially superfluous). +==== Precedence of subquery chaining + +This CIP proposes to distinguish between full (top-level) queries and argument queries. + +A full top-level query is any Cypher query. +In particular, a full top-level query may be formed by composing argument queries using subquery chaining or other query combinators (like `UNION`). + +An argument query is a sequence of clauses that ends in either an updating clause or `RETURN`. +An argument query may not contain subquery chaining or another query combinator. + +These rules establish left-associative precedence of subquery chaining and other query combinators over argument queries. + +In other words, ` UNION WITH ` is always interpreted as `( UNION ) WITH ` but never as ` UNION ( WITH )` (The same rule applies to `THEN`). + +Note:: Subquery chaining and other query combinators may still be used from inside an argument query via inner nested subqueries. + ==== Discarding variables in scope Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. From 2d2435fa210f9a343e4ed7897be9ad2c8cdde930 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 16 Oct 2017 21:55:38 +0200 Subject: [PATCH 18/27] Add ammending nested subqueries and fix query combinator precedence --- ...ested-updating-and-chained-subqueries.adoc | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 6df6ec1b0b..c4f7da61ee 100644 --- a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -43,8 +43,8 @@ This proposal suggests the introduction of new subquery constructs to Cypher. ** Read/Write simple updating subqueries of the form `DO { }` (inner query not ending with `RETURN`) ** Read/Write conditionally-updating subqueries of the form `DO [WHEN THEN { }]+ [ELSE { }] END` (inner queries not ending with `RETURN`) * Chained subqueries -** Chained data-dependent subqueries by extending the `WITH` projection clause that have the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. -** Chained data-independent subqueries by introducing the new `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. +** Chained data-dependent subqueries that extend the `WITH` projection clause with the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. +** Chained data-independent subqueries based on the newly introduced `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. We additionally propose removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of `DO`). @@ -83,17 +83,19 @@ A read-only nested mandatory subquery is denoted by the following syntax: `MANDA ==== Semantics -The nested subquery will be provided with all variables visible in the outer query as subquery input. +The nested subquery will be provided with all variables visible in the outer query as input. -All records returned by the final `RETURN` clause of the subquery will be augmented with the variable bindings of the initial input record from the outer query to form the output records of the subquery. -No other variable bindings will be added to the output records. -If an incoming variable is either discarded or shadows within the subquery, an error will be raised if the subquery returns that variable to the outer query. +Each record returned by the final `RETURN` clause of the subquery augments the variable bindings of the initial input record from the outer query to form an output record of the subquery. +This may shadow existing bindings of the initial input record. +No other variable bindings are added to output records. + +Note:: It is recommended that implementations generate a warning if an incoming variable is both discarded within the inner query while the same inner also query returns a (potentially different) value for that variable to the outer query, i.e. is shadowing (replacing) it. Finally, the result records of the different forms of nested subqueries are formed as follows: * The result records of a read-only regular subquery are just the output records. * The result records of a read-only optional subquery are all the output records (if there is at least one output record), or a single record with the same fields as the output records where all newly introduced variable bindings are set to `NULL`. -* The result records of a read-only mandatory subquery are just the output records. However, if the set of output records is empty, an error is raised in the same way as regular `MANDATORY MATCH`. +* The result records of a read-only mandatory subquery are just the output records. However, if the set of output records is empty, an error is raised in the same way as regular `MANDATORY MATCH` raises an error when no matches are found. Nested subqueries interact with write clauses in the same way as `MATCH` does. @@ -104,7 +106,7 @@ Updating subqueries never change the cardinality; i.e. the inner update query is ==== Read/Write simple updating subqueries -We propose the addition of a new `DO` clause for expressing read/write simple updating subqueries that _do not return any data_ from the inner query. +We propose the addition of a new `DO` clause for expressing read/write simple updating subqueries that _do not return any data_ from the inner query to the outer query. A read/write simple updating subquery is denoted by the following syntax: `DO { }`. @@ -114,7 +116,7 @@ A query may end with a `DO` subquery in the same way that a query can currently ==== Read/Write conditionally-updating subqueries -We propose the addition of a new conditional `DO` clause for expressing read/write conditionally-updating subqueries that _do not return any data_ from the inner query. +We propose the addition of a new conditional `DO` clause for expressing read/write conditionally-updating subqueries that _do not return any data_ from the inner query to the outer query. A read/write conditionally-updating subquery is denoted by the following syntax: @@ -128,61 +130,59 @@ END Evaluation proceeds as follows: * Semantically, the `WHEN` predicates are tested in the order given, and the inner updating query is executed for only the first predicate that evaluates to `true`. -* If no given `WHEN` predicates evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. +* If no given `WHEN` predicate evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. * If no given `WHEN` predicates evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. -A query may end with a conditional `DO` subquery in the same way that a query can currently end with any update clause. +A query may end with a conditional `DO` subquery in the same way that a query can currently end with an update clause. === Chained subqueries +Chained subqueries are queries that compose two (argument) subqueries using a _query combinator_ clause. + +While (full) top-level queries are arbitrary Cypher queries, argument queries are queries that may not contain query combinators. +They may however contain nested subqueries which may use query combinators in their inner queries (which again may be top-level queries). + +Currently Cypher only supports the `UNION` and `UNION ALL` query combinators. +This CIP proposes to extend this set of query combinators with new forms as outlined below. + ==== Chained data-dependent subqueries -We propose extending the `WITH` projection clause to sequentially compose arbitrary queries to form a chained data-dependent subquery without resorting to nesting and indentation (e.g. as a short-hand syntax for post-UNION processing). +We propose the introduction of using the `WITH` projection clause as a new query combinator that can sequentially compose arbitrary queries to form a chained data-dependent subquery without having to resort to nesting and indentation (e.g. as a short-hand syntax for post-UNION processing). Chained data-dependent subqueries have the following general form ` WITH ... `. -Both ` and `` are arbitrary, complete Cypher queries. +Both ` and `` are arbitrary argument queries. Conceptually, the query `` is evaluated for each incoming input record from the query `` and may produce an arbitrary number of result records. In other words, the query `` will be provided with all variables returned by the query `` as input variable bindings. -Furthermore, this CIP proposes allowing a leading `WITH` to project variables from expressions that refer to unbound variables from the preceding scope (or query). +Furthermore, this CIP proposes allowing a leading `WITH` to project new variables from expressions that refer to unbound variables from the preceding scope (or query). This set of referenced, unbound variables of such a leading `WITH` is understood to implicitly declare the input variables required for the query to execute. Note:: This mechanism allows composing a Cypher query with inputs that have been constructed programmatically. ==== Chained data-independent subqueries -We propose introducing the `THEN` projection clause to sequentially compose two arbitrary subqueries to form a chained data-independent subquery without resorting to nesting and indentation. +We propose introducing the `THEN` projection clause as a new query combinator that can sequentially compose two arbitrary subqueries to form a chained data-independent subquery without having to resort to nesting and indentation. Chained data-independent subqueries have the following general form ` THEN `. -Both ` and `` are arbitrary, complete Cypher queries. +Both ` and `` are arbitrary argument queries. No variables and no input records are passed from `` to ``. Instead `` is executed in a standalone fashion after the execution of `` has finished. Furthermore, this CIP proposes allowing queries to start with a leading `THEN` for discarding all variables in scope as well as the cardinality of all input records provided by the surrounding execution environment. -Note:: This mechanism allows guaranteed execution of `` irrespective of the number of records produced by ``. +Note:: This mechanism allows guaranteed execution of an (usually updating) query `` irrespective of the number of records produced by query ``. Note:: In general, `` is expected to be an updating query and it is recommended that implementations generate a warning if this is not the case (to inform the user that `` is essentially superfluous). -==== Precedence of subquery chaining - -This CIP proposes to distinguish between full (top-level) queries and argument queries. - -A full top-level query is any Cypher query. -In particular, a full top-level query may be formed by composing argument queries using subquery chaining or other query combinators (like `UNION`). - -An argument query is a sequence of clauses that ends in either an updating clause or `RETURN`. -An argument query may not contain subquery chaining or another query combinator. - -These rules establish left-associative precedence of subquery chaining and other query combinators over argument queries. +==== Query combinator precedence -In other words, ` UNION WITH ` is always interpreted as `( UNION ) WITH ` but never as ` UNION ( WITH )` (The same rule applies to `THEN`). +This CIP proposes that all Cypher query combinators are left-associative regarding their argument queries. -Note:: Subquery chaining and other query combinators may still be used from inside an argument query via inner nested subqueries. +Note:: In other words, ` UNION WITH ` is always interpreted as `( UNION ) WITH ` but never as ` UNION ( WITH )` (The same rule applies to `THEN`). ==== Discarding variables in scope From 0156bc390c9c0ed27a1ca6bd317581125619cc20 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 16 Oct 2017 22:16:59 +0200 Subject: [PATCH 19/27] Fix definition of chained queries and move to right directory --- ...2016-06-22-nested-updating-and-chained-subqueries.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename cip/{ => 1.accepted}/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc (96%) diff --git a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc similarity index 96% rename from cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc rename to cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index c4f7da61ee..50c6294dd7 100644 --- a/cip/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -138,10 +138,10 @@ A query may end with a conditional `DO` subquery in the same way that a query ca === Chained subqueries -Chained subqueries are queries that compose two (argument) subqueries using a _query combinator_ clause. +Chained subqueries are queries that compose a top-level result query using a _query combinator_ clause from two input queries: a left-hand side top-level query and a right-hand side argument query. -While (full) top-level queries are arbitrary Cypher queries, argument queries are queries that may not contain query combinators. -They may however contain nested subqueries which may use query combinators in their inner queries (which again may be top-level queries). +In this definition, top-level queries are arbitrary Cypher queries, while argument queries are queries that may *not* contain query combinators. +Argument queries may however contain nested subqueries whose inner queries again may be top-level queries (that may very well contain query combinators). Currently Cypher only supports the `UNION` and `UNION ALL` query combinators. This CIP proposes to extend this set of query combinators with new forms as outlined below. @@ -180,7 +180,7 @@ Note:: In general, `` is expected to be an updating query and it is recommen ==== Query combinator precedence -This CIP proposes that all Cypher query combinators are left-associative regarding their argument queries. +This CIP proposes that all Cypher query combinators are left-associative regarding their left-hand side and right-hand side input queries. Note:: In other words, ` UNION WITH ` is always interpreted as `( UNION ) WITH ` but never as ` UNION ( WITH )` (The same rule applies to `THEN`). From acfac59cd7e70927e3bcabda2613a3f60f841396 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Tue, 17 Oct 2017 13:29:59 +0100 Subject: [PATCH 20/27] Textual edits --- ...ested-updating-and-chained-subqueries.adoc | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 50c6294dd7..469da9f1ce 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -31,7 +31,7 @@ This CIP may be viewed in light of CIPs for query combinators and set operations == Proposal -Subqueries are self-contained Cypher queries that are usually run within the scope of an outer, containing Cypher query. +Subqueries are self-contained Cypher queries that are usually run within the scope of an outer Cypher query. This proposal suggests the introduction of new subquery constructs to Cypher. @@ -40,11 +40,11 @@ This proposal suggests the introduction of new subquery constructs to Cypher. ** Read-only nested optional subqueries of the form `OPTIONAL MATCH { }` ** Read-only nested mandatory subqueries of the form `MANDATORY MATCH { }` * Read/Write updating subqueries -** Read/Write simple updating subqueries of the form `DO { }` (inner query not ending with `RETURN`) -** Read/Write conditionally-updating subqueries of the form `DO [WHEN THEN { }]+ [ELSE { }] END` (inner queries not ending with `RETURN`) +** Read/Write simple updating subqueries of the form `DO { }` (the inner query not ending with `RETURN`) +** Read/Write conditionally-updating subqueries of the form `DO [WHEN THEN { }]+ [ELSE { }] END` (the inner queries not ending with `RETURN`) * Chained subqueries -** Chained data-dependent subqueries that extend the `WITH` projection clause with the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. -** Chained data-independent subqueries based on the newly introduced `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. +** Chained _data-dependent_ subqueries that extend the `WITH` projection clause with the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. +** Chained _data-independent_ subqueries based on the newly-introduced `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. We additionally propose removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of `DO`). @@ -59,7 +59,9 @@ Read/Write subqueries cannot be contained within other read-only subqueries. === Read-only nested subqueries -Conceptually, a nested subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. +A nested subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. + +In all instances below, `` denotes any complete, read-only Cypher query. ==== Read-only nested regular subqueries @@ -67,8 +69,6 @@ We propose the addition of read-only nested regular subqueries as a new form of A nested read-only simple subquery is denoted using the following syntax: `MATCH { }`. -The inner query can be any complete read-only Cypher query. - ==== Read-only nested optional subqueries We propose extending the `OPTIONAL MATCH` clause to express read-only nested optional subqueries. @@ -89,15 +89,15 @@ Each record returned by the final `RETURN` clause of the subquery augments the v This may shadow existing bindings of the initial input record. No other variable bindings are added to output records. -Note:: It is recommended that implementations generate a warning if an incoming variable is both discarded within the inner query while the same inner also query returns a (potentially different) value for that variable to the outer query, i.e. is shadowing (replacing) it. +Note:: It is recommended that implementations generate a warning if an incoming variable is both discarded within the inner query while the same inner query also returns a (potentially different) value for that variable to the outer query, i.e. is shadowing (replacing) it. Finally, the result records of the different forms of nested subqueries are formed as follows: * The result records of a read-only regular subquery are just the output records. -* The result records of a read-only optional subquery are all the output records (if there is at least one output record), or a single record with the same fields as the output records where all newly introduced variable bindings are set to `NULL`. +* The result records of a read-only optional subquery are all the output records (if there is at least one output record), or a single record with the same fields as the output records where all newly-introduced variable bindings are set to `NULL`. * The result records of a read-only mandatory subquery are just the output records. However, if the set of output records is empty, an error is raised in the same way as regular `MANDATORY MATCH` raises an error when no matches are found. -Nested subqueries interact with write clauses in the same way as `MATCH` does. +Nested subqueries interact with write clauses in the same way that `MATCH` does. === Read/Write updating subqueries @@ -129,7 +129,7 @@ END Evaluation proceeds as follows: -* Semantically, the `WHEN` predicates are tested in the order given, and the inner updating query is executed for only the first predicate that evaluates to `true`. +* Semantically, the `WHEN` predicates are tested in the order given, and the inner updating query is executed only for the first predicate that evaluates to `true`. * If no given `WHEN` predicate evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. * If no given `WHEN` predicates evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. @@ -138,7 +138,7 @@ A query may end with a conditional `DO` subquery in the same way that a query ca === Chained subqueries -Chained subqueries are queries that compose a top-level result query using a _query combinator_ clause from two input queries: a left-hand side top-level query and a right-hand side argument query. +_Chained_ subqueries are queries that compose a top-level result query using a _query combinator_ clause from two input queries: a left-hand side 'top-level' query and a right-hand side argument query. In this definition, top-level queries are arbitrary Cypher queries, while argument queries are queries that may *not* contain query combinators. Argument queries may however contain nested subqueries whose inner queries again may be top-level queries (that may very well contain query combinators). From 7554cc9685217205912a59e3e680706336fd350d Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Tue, 17 Oct 2017 15:20:31 +0200 Subject: [PATCH 21/27] Clarified query combinator semantics --- ...-06-22-nested-updating-and-chained-subqueries.adoc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 469da9f1ce..d5b8155ad4 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -138,21 +138,22 @@ A query may end with a conditional `DO` subquery in the same way that a query ca === Chained subqueries -_Chained_ subqueries are queries that compose a top-level result query using a _query combinator_ clause from two input queries: a left-hand side 'top-level' query and a right-hand side argument query. +Chained subqueries are queries that compose a top-level result query using a sequence of _query combinator_ clauses each followed by a query component. -In this definition, top-level queries are arbitrary Cypher queries, while argument queries are queries that may *not* contain query combinators. -Argument queries may however contain nested subqueries whose inner queries again may be top-level queries (that may very well contain query combinators). +A query component is a sequence of clauses that either describes an updating query or a read-only that ends in a `RETURN` clause but does not contain any top-level query combinator clauses. +Query components may however contain nested subqueries whose inner queries contain query combinator clauses. Currently Cypher only supports the `UNION` and `UNION ALL` query combinators. This CIP proposes to extend this set of query combinators with new forms as outlined below. + ==== Chained data-dependent subqueries We propose the introduction of using the `WITH` projection clause as a new query combinator that can sequentially compose arbitrary queries to form a chained data-dependent subquery without having to resort to nesting and indentation (e.g. as a short-hand syntax for post-UNION processing). Chained data-dependent subqueries have the following general form ` WITH ... `. -Both ` and `` are arbitrary argument queries. +Both ` and `` are arbitrary query components. Conceptually, the query `` is evaluated for each incoming input record from the query `` and may produce an arbitrary number of result records. In other words, the query `` will be provided with all variables returned by the query `` as input variable bindings. @@ -168,7 +169,7 @@ We propose introducing the `THEN` projection clause as a new query combinator th Chained data-independent subqueries have the following general form ` THEN `. -Both ` and `` are arbitrary argument queries. +Both ` and `` are query components. No variables and no input records are passed from `` to ``. Instead `` is executed in a standalone fashion after the execution of `` has finished. From 10fa1823e443f75274d317dd70cc2de20da0b593 Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Thu, 19 Oct 2017 18:21:15 +0100 Subject: [PATCH 22/27] Fixed erroneous queries --- ...IP2016-06-22-nested-updating-and-chained-subqueries.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index d5b8155ad4..70855decaf 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -225,7 +225,7 @@ MATCH { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) - RETURN b.name AS name, p.code AS code + RETURN b.name AS name, v.code AS code } RETURN f, name, code ---- @@ -243,7 +243,7 @@ MATCH { (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) WHERE v.leftHandDrive = country.leftHandDrive - RETURN b.name AS name, p.code AS code + RETURN b.name AS name, v.code AS code } RETURN f, name, code ---- @@ -262,7 +262,7 @@ MATCH { (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) WHERE v.leftHandDrive = country.leftHandDrive - RETURN b AS brand, p.code AS code + RETURN b AS brand, v.code AS code } WHERE f.type = 'organic' AND b.certified From 1ca70bf8e3cea65ee47ce49eeabea83530eb529b Mon Sep 17 00:00:00 2001 From: Petra Selmer Date: Wed, 17 Jan 2018 16:33:30 +0000 Subject: [PATCH 23/27] Reformatted title --- .../CIP2016-06-22-nested-updating-and-chained-subqueries.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 70855decaf..4b7f3d0b75 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -1,4 +1,4 @@ -= CIP2016-06-22 - Nested, updating, and chained subqueries += CIP2016-06-22 Nested, updating, and chained subqueries :numbered: :toc: :toc-placement: macro From cfc2a43cd4a11fc0f2b5dc85d78e1d667893434a Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Sun, 6 May 2018 18:13:24 +0200 Subject: [PATCH 24/27] Reworking/incorporating alternative CIP --- ...ested-updating-and-chained-subqueries.adoc | 165 +++++++++++++++--- 1 file changed, 144 insertions(+), 21 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 4b7f3d0b75..5b252b4688 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -9,13 +9,20 @@ [abstract] .Abstract -- -This CIP proposes the incorporation of nested, updating, and chained subqueries into Cypher. +Cypher currently has no support for nested subqueries. +This is limiting as it prevents e.g. post-processing of union results or changing the working graph via a subquery. +This CIP proposes to add support for nested subqueries and composite statements to Cypher. +Nested subqueries may be uncorrelated (take no input records), correlated (take input records), produce tables, graphs, or have side-effects (i.e. perform updates). -- toc::[] -== Motivation + +== Introduction + + +=== Motivation Subqueries - i.e. queries within queries - are a powerful and expressive feature allowing for: @@ -25,9 +32,23 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature * Post-processing results from multiple queries as a single unit * Performing a sequence of multiple write commands for each record -== Background -This CIP may be viewed in light of CIPs for query combinators and set operations, `EXISTS`, scalar subqueries, and list subqueries. +=== Background + +This CIP has been created in tandem with CIP2017-06-18 for adding support for working with multiple graphs to Cypher and relies on the terminology for describing the high-level structure of queries introduced in CIP2017-06-18. +Therefore this proposal is based on the assumption that CIP2017-06-18 will be accepted. + +This CIP should also be viewed in light of CIPs for set operations, `EXISTS`, scalar subqueries, and list subqueries. + +=== Design goals + +This proposal follows the following design goals + +1. Ensure that subqueries have the exact same capabilities in terms of consumed inputs, produces outputs, and potential side-effects as regular standalone queries. + +2. Ensure that every subquery is a syntactically valid standalone query independent of which variables are provided by the calling context. + + == Proposal @@ -35,31 +56,133 @@ Subqueries are self-contained Cypher queries that are usually run within the sco This proposal suggests the introduction of new subquery constructs to Cypher. -* Read-only nested subqueries -** Read-only nested regular subqueries of the form `MATCH { }` -** Read-only nested optional subqueries of the form `OPTIONAL MATCH { }` -** Read-only nested mandatory subqueries of the form `MANDATORY MATCH { }` -* Read/Write updating subqueries -** Read/Write simple updating subqueries of the form `DO { }` (the inner query not ending with `RETURN`) -** Read/Write conditionally-updating subqueries of the form `DO [WHEN THEN { }]+ [ELSE { }] END` (the inner queries not ending with `RETURN`) -* Chained subqueries -** Chained _data-dependent_ subqueries that extend the `WITH` projection clause with the form ` `. Additionally, this CIP proposes new shorthand syntax for starting a query with `WITH` to compose a query with external inputs. -** Chained _data-independent_ subqueries based on the newly-introduced `THEN` clause for discarding all variables in scope as well as the cardinality of all input records. Additionally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. +* Nested tabular subqueries +** Nested tabular subqueries of the form `CALL { }` +** Optional nested tabular subqueries of the form `OPTIONAL CALL { }` +** Mandatory nested tabular subqueries of the form `MANDATORY CALL { }` +** Grouped nested tabular subqueries +* Nested graph subqueries +** Create graph subqueries of the form `CREATE GRAPH { }, { }, ...` +** Read graph subqueries of the form `FROM { }` +** Update graph subqueries of the form `UPDATE { }` +* Syntactic simplifications +** Composite statements +** Conditional nested subqueries -We additionally propose removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of `DO`). +Both uncorrelated and correlated forms of nested subqueries are supported by this CIP. -Subquery constructs are always introduced with a keyword(s) in conjunction with an inner query in curly braces. +This proposal additional suggests removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of nested tabular subqueries). -Subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. -Subqueries can be contained within other subqueries at an arbitrary (but finite) depth. +== Nested subqueries -Read/Write subqueries cannot be contained within other read-only subqueries. +Nested subqueries are always introduced with keywords that are followed by an inner query in curly braces. +_Definition_: A *nested subquery* is a composite statement that syntactically occurs as an argument to another clause that is enclosed in curly braces. -=== Read-only nested subqueries +Usage of nested subqueries must adhere to the following rules: + +1. Nested subqueries can be contained within other nested subqueries at an arbitrary (but finite) depth. +2. Nested subqueries that perform updates cannot be contained within nested subqueries in read-only contexts. +3. Nested subqueries are not allowed to contain schema commands + +Note:: These restrictions capture current use of Cypher and may be reduced in the future. + +Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. + +_Definition_: A *correlated nested subquery* is a *nested subquery* that has at least one leading clause that is a `WITH` clause that references a variable from a preceding clauses. + +_Definition_: An *uncorrelated nested subquery* is a *nested subquery* that has no leading clause that is a `WITH` clause that references a variable from the preceding clauses. + +A composite statement that is used as a nested subquery may have multiple points of entry. +The following definition captures this concept of entry points into a subquery by using the terminology introduced in CIP2017-06-18: + +_Definition_: The *leading clauses* of a composite statement are the leading clauses of the first single statement. +The leading clauses of a single statement are the leading clauses of its constituents. +The leading clause of a simple clause chain is the first clause in the sequence of clauses unless that clause is a call to a nested subquery in which case the leading clauses of the simple clause chain will be taken to be the leading clauses of that nested subquery. +The leading clauses of an operator clause chain are the leading clauses of all simple clause chains that are connected directly by the operator clause of the operator clause chain. + + +=== Nested table subqueries + +A nested table subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. + +_Definition_: A *nested table subquery* is a composite statement that returns a table. + +We extend `CALL` with a new syntactic form that allows a nested table subquery argument and may be used either in a stand-alone call or inside a simple clause chain. + +[source, cypher] +---- +-- preceding clauses +... +CALL { + -- nested table subquery + ... +} +-- remaining clauses +... +---- -A nested subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. + +[#uncorrelated-subqueries] +==== Uncorrelated nested table subqueries + +Semantics: + +1. The nested table subquery is executed for each record produced by preceding clauses. +This record is called the *input record* in this context. +No variable bindings are made available to the nested subquery. +This rule is relaxed for <>. + +2. If the nested table subquery returns nothing (i.e. ends in an updating command), then all input records are passed on to the remaining clauses. + +3. If the nested table subquery returns tabular data, each input record produced by preceding clauses is combined with each record returned by calling the nested subquery for that input record to produce result records. +All such result records are passed on as input to the remaining query. + +4. An error is raised if the nested table subquery produces a tabular result that binds a variable that is already bound in the outer query. +This rule is relaxed for <>. + +5. Any change to the working graph during the execution of the nested table subquery is not visible to the remaining clauses. +In other words, the working graph is duplicated on the working graph stack when calling a nested table subquery and the working graph is removed from the working graph stack when consuming the result of calling a nested table subquery. + + +[#correlated-subqueries] +==== Correlated nested table subqueries + +Correlated nested table subqueries refer to variable bindings from preceding clauses. +Syntactically, this is achieved by using the `WITH` clause as a leading clause of the nested table subquery that declares required inputs in terms of available variables from preceding clauses. + +Semantics: + +1. All rules for <> apply for correlated nested table subqueries unless otherwise noted in this list. + +2. All variable bindings of the input record are made available to all leading clauses of the nested table subquery. + +3. The nested subquery may return variables already bound by preceding clauses if it can be shown via simple static analysis that these have just been passed through. +It is not required that this analysis takes into account aliasing inside the nested subquery. + + +=== Optional nested table subqueries and procedure calls + +A nested table subquery may be prefixed with the keyword `OPTIONAL`. + +If calling the nested table subquery returns an empty result, this empty result is replaced with a table that consists of a single record that maps all variables that have been newly introduced by the the nested table subquery to `NULL` and all variables that have been passed through by the nested table subquery to their value in the input record. + +An implementation may choose to support the same semantics for calling procedures using syntax like `OPTIONAL CALL myProc(...) YIELD ...`. + + +=== Mandatory nested table subqueries and procedure calls + +A nested table subquery may be prefixed with the keyword `MANDATORY`. + +If calling the nested table subquery returns an empty result, an error is raised. + +The same semantics are supported for calling procedures using syntax like `MANDATORY CALL myProc(...) YIELD ...`. + +// UP TO HERE + + +=== Read-only nested subqueries In all instances below, `` denotes any complete, read-only Cypher query. From 6199430279ffa801a769e07b63d4b8a59621d0b2 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 7 May 2018 00:41:31 +0200 Subject: [PATCH 25/27] Fused with nested subqueries CIP from multigraph work --- ...ested-updating-and-chained-subqueries.adoc | 340 ++++++++++-------- 1 file changed, 182 insertions(+), 158 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 5b252b4688..5f6e11139e 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -56,25 +56,26 @@ Subqueries are self-contained Cypher queries that are usually run within the sco This proposal suggests the introduction of new subquery constructs to Cypher. -* Nested tabular subqueries -** Nested tabular subqueries of the form `CALL { }` -** Optional nested tabular subqueries of the form `OPTIONAL CALL { }` -** Mandatory nested tabular subqueries of the form `MANDATORY CALL { }` -** Grouped nested tabular subqueries -* Nested graph subqueries -** Create graph subqueries of the form `CREATE GRAPH { }, { }, ...` -** Read graph subqueries of the form `FROM { }` -** Update graph subqueries of the form `UPDATE { }` -* Syntactic simplifications -** Composite statements +* Nested subqueries +** Nested tabular subqueries +*** Nested tabular subqueries of the form `CALL { }` +*** Optional nested tabular subqueries of the form `OPTIONAL CALL { }` +*** Mandatory nested tabular subqueries of the form `MANDATORY CALL { }` +** Nested graph subqueries +*** Create graph subqueries of the form `CREATE GRAPH { }, GRAPH { }, ...` +*** Read graph subqueries of the form `FROM { }` +*** Update graph subqueries of the form `UPDATE { }` +** Grouped nested subqueries +* Syntax simplifications ** Conditional nested subqueries +** Composite statements Both uncorrelated and correlated forms of nested subqueries are supported by this CIP. This proposal additional suggests removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of nested tabular subqueries). -== Nested subqueries +=== Nested subqueries Nested subqueries are always introduced with keywords that are followed by an inner query in curly braces. @@ -124,7 +125,7 @@ CALL { ---- -[#uncorrelated-subqueries] +[#uncorrelated-table-subqueries] ==== Uncorrelated nested table subqueries Semantics: @@ -132,7 +133,7 @@ Semantics: 1. The nested table subquery is executed for each record produced by preceding clauses. This record is called the *input record* in this context. No variable bindings are made available to the nested subquery. -This rule is relaxed for <>. +This rule is relaxed for <>. 2. If the nested table subquery returns nothing (i.e. ends in an updating command), then all input records are passed on to the remaining clauses. @@ -140,13 +141,15 @@ This rule is relaxed for <>. All such result records are passed on as input to the remaining query. 4. An error is raised if the nested table subquery produces a tabular result that binds a variable that is already bound in the outer query. -This rule is relaxed for <>. +This rule is relaxed for <>. 5. Any change to the working graph during the execution of the nested table subquery is not visible to the remaining clauses. In other words, the working graph is duplicated on the working graph stack when calling a nested table subquery and the working graph is removed from the working graph stack when consuming the result of calling a nested table subquery. +6. An error is raised if a non-standalone `CALL` is provided with a subquery that does not return a table. -[#correlated-subqueries] + +[#correlated-table-subqueries] ==== Correlated nested table subqueries Correlated nested table subqueries refer to variable bindings from preceding clauses. @@ -154,7 +157,7 @@ Syntactically, this is achieved by using the `WITH` clause as a leading clause o Semantics: -1. All rules for <> apply for correlated nested table subqueries unless otherwise noted in this list. +1. All rules for <> apply for correlated nested table subqueries unless otherwise noted in this list. 2. All variable bindings of the input record are made available to all leading clauses of the nested table subquery. @@ -162,7 +165,8 @@ Semantics: It is not required that this analysis takes into account aliasing inside the nested subquery. -=== Optional nested table subqueries and procedure calls +[#optional-table-subqueries] +==== Optional nested table subqueries and procedure calls A nested table subquery may be prefixed with the keyword `OPTIONAL`. @@ -171,7 +175,8 @@ If calling the nested table subquery returns an empty result, this empty result An implementation may choose to support the same semantics for calling procedures using syntax like `OPTIONAL CALL myProc(...) YIELD ...`. -=== Mandatory nested table subqueries and procedure calls +[#mandatory-table-subqueries] +==== Mandatory nested table subqueries and procedure calls A nested table subquery may be prefixed with the keyword `MANDATORY`. @@ -179,147 +184,141 @@ If calling the nested table subquery returns an empty result, an error is raised The same semantics are supported for calling procedures using syntax like `MANDATORY CALL myProc(...) YIELD ...`. -// UP TO HERE - - -=== Read-only nested subqueries - -In all instances below, `` denotes any complete, read-only Cypher query. - -==== Read-only nested regular subqueries -We propose the addition of read-only nested regular subqueries as a new form of read-only Cypher query. +=== Nested graph subqueries and procedure calls -A nested read-only simple subquery is denoted using the following syntax: `MATCH { }`. +_Definition_: A nested graph subquery is a nested subquery that returns a graph. -==== Read-only nested optional subqueries +Nested graph subqueries may be used in the following forms: -We propose extending the `OPTIONAL MATCH` clause to express read-only nested optional subqueries. + * `CREATE GRAPH name { AS , ...` will create one or more graphs in the catalog by making a copy of the graph returned by the subquery. + These graphs are logically created in parallel and thus cannot interact with each other. + * `[OPTIONAL|MANDATORY] FROM { } | ` will change the working graph for further read operations without affecting the current variable bindings and the cardinality of records available to following clauses. + * `[OPTIONAL|MANDATORY] UPDATE { } | ` will change the working graph for further updating operations without affecting the current variable bindings and the cardinality of records available to following clauses. -A read-only nested optional subquery is denoted by the following syntax: `OPTIONAL MATCH { }`. +Note:: The subquery form of `CALL` may not return a graph as there would be no indication regarding the allowed operations for further processing (reading, updating, ...). -==== Read-only nested mandatory subqueries +Note:: The stand-alone form of `CALL` may produce a graph result. -We propose extending the `MANDATORY MATCH` clause to express read-only nested mandatory subqueries. +Semantics: -A read-only nested mandatory subquery is denoted by the following syntax: `MANDATORY MATCH { }`. +1. Nested graph subqueries are provided with tabular input in the same way as nested table subqueries. -==== Semantics +2. Correlated nested graph subqueries will change the working graph for every input record. -The nested subquery will be provided with all variables visible in the outer query as input. +3. A `MANDATORY` nested graph subquery raises an error if the provided graph argument is an empty graph. -Each record returned by the final `RETURN` clause of the subquery augments the variable bindings of the initial input record from the outer query to form an output record of the subquery. -This may shadow existing bindings of the initial input record. -No other variable bindings are added to output records. +4. An `OPTIONAL` nested graph subquery does not change the working graph if the provided graph argument is an empty graph. -Note:: It is recommended that implementations generate a warning if an incoming variable is both discarded within the inner query while the same inner query also returns a (potentially different) value for that variable to the outer query, i.e. is shadowing (replacing) it. -Finally, the result records of the different forms of nested subqueries are formed as follows: -* The result records of a read-only regular subquery are just the output records. -* The result records of a read-only optional subquery are all the output records (if there is at least one output record), or a single record with the same fields as the output records where all newly-introduced variable bindings are set to `NULL`. -* The result records of a read-only mandatory subquery are just the output records. However, if the set of output records is empty, an error is raised in the same way as regular `MANDATORY MATCH` raises an error when no matches are found. +=== Grouped nested subqueries -Nested subqueries interact with write clauses in the same way that `MATCH` does. +Correlated nested subqueries are by default called for each input record. +Grouped nested subqueries instead execute the nested subquery for all input records that share the same grouping key. +Grouped subqueries optionally may compute additional variable bindings or query parameters in terms of the grouping key using the established syntax for return items (` AS `, ` AS $`). +Syntactically, the grouping key may be specified by prefixing a nested subquery with a leading `PER` sub-clause that specifies the components of the grouping key and may optionally bind new parameters. +Syntax: -=== Read/Write updating subqueries +[source, cypher] +---- +CALL PER ... { ... } +FROM PER ... { ... } +UPDATE PER ... { ... } +---- -Updating subqueries never change the cardinality; i.e. the inner update query is run for each incoming input record. +Semantics: -==== Read/Write simple updating subqueries +1. The grouping key declaration binds new variables and parameters by evaluating arbitrary expressions over all variable bindings in scope. -We propose the addition of a new `DO` clause for expressing read/write simple updating subqueries that _do not return any data_ from the inner query to the outer query. +2. The grouping key declaration may shadow an already bound parameter or variable inside the nested subquery. -A read/write simple updating subquery is denoted by the following syntax: `DO { }`. +3. Introduced parameters and variables are only visible inside the nested subquery. -Any updating Cypher query from which the trailing final `RETURN` clause has been omitted may be used as an inner update query. -A query may end with a `DO` subquery in the same way that a query can currently end with any update clause. -==== Read/Write conditionally-updating subqueries +=== Conditional nested subqueries -We propose the addition of a new conditional `DO` clause for expressing read/write conditionally-updating subqueries that _do not return any data_ from the inner query to the outer query. +Correlated nested subqueries may start with a `WHERE ...` clause as a short hand for `WITH * WHERE ...`. -A read/write conditionally-updating subquery is denoted by the following syntax: +This CIP proposes the introduction of the `OTHERWISE` operator clause: -``` -DO - [WHEN THEN ]+ - [ELSE ] -END -``` +1. ` OTHERWISE OTHERWISE ... ` either combines read-only simple clause chains or updating simple clause chains but raises an error when used to combine both read-only and updating simple clause chains. -Evaluation proceeds as follows: +2. ` OTHERWISE OTHERWISE ... ` raises an error if any two simple clause chains do not either both return a graph or a table with the same fields. -* Semantically, the `WHEN` predicates are tested in the order given, and the inner updating query is executed only for the first predicate that evaluates to `true`. -* If no given `WHEN` predicate evaluates to `true` and an `ELSE` branch is provided, the inner updating query of the `ELSE` branch is executed. -* If no given `WHEN` predicates evaluates to `true` and no `ELSE` branch is provided, no updates will be executed. +3. If ` OTHERWISE OTHERWISE ... ` is used to combine read-only simple clause chains, it evaluates to the first `` that either returns a non-empty table or a non-empty graph and to `` otherwise. -A query may end with a conditional `DO` subquery in the same way that a query can currently end with an update clause. +4. If ` OTHERWISE OTHERWISE ... ` is used to combine updating simple clause chains, it evaluates to the first `` that performs a side-effects and to `` otherwise. -=== Chained subqueries +=== Composite statements -Chained subqueries are queries that compose a top-level result query using a sequence of _query combinator_ clauses each followed by a query component. +Simple statements are either simple clause chains or operator clause chains (This is defined in `CIP2017-06-18`). -A query component is a sequence of clauses that either describes an updating query or a read-only that ends in a `RETURN` clause but does not contain any top-level query combinator clauses. -Query components may however contain nested subqueries whose inner queries contain query combinator clauses. +Composite statements allow sequencing simple statements using the `THEN` clause. +The `THEN` clause may be omitted if the preceding clause is a `RETURN` or `RETURN GRAPH` clause. +This is called composition using vertical juxtaposition. -Currently Cypher only supports the `UNION` and `UNION ALL` query combinators. -This CIP proposes to extend this set of query combinators with new forms as outlined below. +=== Discarding variables in scope -==== Chained data-dependent subqueries +Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. -We propose the introduction of using the `WITH` projection clause as a new query combinator that can sequentially compose arbitrary queries to form a chained data-dependent subquery without having to resort to nesting and indentation (e.g. as a short-hand syntax for post-UNION processing). -Chained data-dependent subqueries have the following general form ` WITH ... `. +== Grammar -Both ` and `` are arbitrary query components. +The following grammar shows the main syntax of all proposed changes: +[source, cypher] -Conceptually, the query `` is evaluated for each incoming input record from the query `` and may produce an arbitrary number of result records. -In other words, the query `` will be provided with all variables returned by the query `` as input variable bindings. +---- + ::= < simple statement > [ { `THEN` < simple statement > } ; -Furthermore, this CIP proposes allowing a leading `WITH` to project new variables from expressions that refer to unbound variables from the preceding scope (or query). -This set of referenced, unbound variables of such a leading `WITH` is understood to implicitly declare the input variables required for the query to execute. + ::= < query-mode > CALL < query-group > < subquery > + | < query-mode > CALL < query-group > < invocation > + ; -Note:: This mechanism allows composing a Cypher query with inputs that have been constructed programmatically. + ::= < query-mode > FROM [ PER * | < keys > ] < subquery > ; -==== Chained data-independent subqueries + ::= < query-mode > UPDATE < query-group > < subquery > ; -We propose introducing the `THEN` projection clause as a new query combinator that can sequentially compose two arbitrary subqueries to form a chained data-independent subquery without having to resort to nesting and indentation. + ::= CREATE GRAPH < graph > [ { `,` GRAPH < graph > } ... ] ; -Chained data-independent subqueries have the following general form ` THEN `. + ::= < identifier > < subquery > + | < invocation > AS < identifier > + | < identifier > + ; -Both ` and `` are query components. -No variables and no input records are passed from `` to ``. -Instead `` is executed in a standalone fashion after the execution of `` has finished. + ::= [ OPTIONAL | MANDATORY ] ; -Furthermore, this CIP proposes allowing queries to start with a leading `THEN` for discarding all variables in scope as well as the cardinality of all input records provided by the surrounding execution environment. + ::= [ PER * | < keys > ] ; -Note:: This mechanism allows guaranteed execution of an (usually updating) query `` irrespective of the number of records produced by query ``. + ::= `{` < composite statement > `}` + | `{` WHERE < predicate > < composite statement > `}` + ; -Note:: In general, `` is expected to be an updating query and it is recommended that implementations generate a warning if this is not the case (to inform the user that `` is essentially superfluous). + ::= < identifier > `(` < args > `)` [ YIELD * | < bindings > ] ; -==== Query combinator precedence + ::= < expr > [ { `,` < expr> } ... ] ; -This CIP proposes that all Cypher query combinators are left-associative regarding their left-hand side and right-hand side input queries. + ::= < key > [ { `,` < key > } ... ] ; + ::= < expr > [ AS [ `$` ] < identifier > ] ; -Note:: In other words, ` UNION WITH ` is always interpreted as `( UNION ) WITH ` but never as ` UNION ( WITH )` (The same rule applies to `THEN`). + ::= < item > [ { `,` < item > } ... ] ; + ::= < identifier > [ AS < identifier > ] ; +---- -==== Discarding variables in scope -Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. +== Examples -=== Examples -==== Read-only nested regular subqueries +=== Read-only nested table subqueries Post-UNION processing: [source, cypher] ---- -MATCH { +CALL { // authored tweets MATCH (me:User {name: 'Alice'})-[:FOLLOWS]->(user:User), (user)<-[:AUTHORED]-(tweet:Tweet) @@ -336,11 +335,11 @@ ORDER BY time DESC LIMIT 10 ---- -Uncorrelated nested subquery: +Uncorrelated nested table subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -MATCH { +CALL { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -353,11 +352,12 @@ MATCH { RETURN f, name, code ---- -Correlated nested subquery: +Correlated nested table subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId})-[:IS_IN]->(country:Country) -MATCH { +CALL { + WITH country MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b.name AS name, p.code AS code @@ -376,11 +376,12 @@ Filtered and correlated nested subquery: ---- MATCH (f:Farm)-[:IS_IN]->(country:Country) WHERE country.name IN $countryNames -MATCH { +CALL { MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(p:Lawnmower) RETURN b AS brand, p.code AS code UNION + WITH country MATCH (u:User {id: $userId})-[:LIKES]->(b:Brand), (b)-[:PRODUCES]->(v:Vehicle), (v)<-[:IS_A]-(:Category {name: 'Tractor'}) @@ -392,58 +393,64 @@ WHERE f.type = 'organic' RETURN f, brand.name AS name, code ---- -Doubly-nested subquery: +Doubly-nested table subquery: [source, cypher] ---- MATCH (f:Farm {id: $farmId}) -MATCH { +CALL { + WITH f MATCH (c:Customer)-[:BUYS_FOOD_AT]->(f) - MATCH { - MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) - RETURN c, count(*) AS count - UNION - MATCH (c)-[:LIKES]->(p:Posting)<-[:POSTED_BY]-(f) - RETURN c, count(*) AS count + CALL { + WITH c, f + MATCH (c)-[:RETWEETS]->(t:Tweet)<-[:TWEETED_BY]-(f) + RETURN c, count(*) AS count + UNION + MATCH (c)-[:LIKES]->(p:Posting)<-[:POSTED_BY]-(f) + RETURN c, count(*) AS count } - RETURN c, 'customer' AS type, sum(count) AS endorsement + RETURN 'customer' AS type, sum(count) AS endorsement UNION + WITH f MATCH (s:Shop)-[:BUYS_FOOD_AT]->(f) MATCH (s)-[:PLACES]->(a:Advertisement)-[:ABOUT]->(f) - RETURN s, 'shop' AS type, count(a) * 100 AS endorsement + RETURN 'shop' AS type, count(a) * 100 AS endorsement } RETURN f.name AS name, type, sum(endorsement) AS endorsement ---- -===== Read-only nested optional and mandatory subqueries -This proposal also provides nested subquery forms of `OPTIONAL MATCH` and `MANDATORY MATCH`: +=== Read-only nested optional and mandatory table subqueries + +This proposal also provides nested table subquery forms of `OPTIONAL MATCH` and `MANDATORY MATCH`: [source, cypher] ---- MANDATORY MATCH (p:Person {name: 'Petra'}) MANDATORY MATCH (conf:Conference {name: $conf}) -MANDATORY MATCH { - WITH * WHERE conf.impact > 5 - MATCH (p)-[:ATTENDS]->(conf) - RETURN conf - UNION - MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf) - RETURN conf +MANDATORY CALL { + WHERE conf.impact > 5 + MATCH (p)-[:ATTENDS]->(conf) + RETURN conf + UNION + MATCH (p)-[:LIVES_IN]->(:City)<-[:IN]-(conf) + RETURN conf } -OPTIONAL MATCH { - MATCH (p)-[:KNOWS]->(a:Attendee)-[:PUBLISHED_AT]->(conf) - RETURN a.name AS name - UNION - MATCH (p)-[:KNOWS]->(a:Attendee)-[:PRESENTED_AT]->(conf) - RETURN a.name AS name +OPTIONAL CALL { + WITH * + MATCH (p)-[:KNOWS]->(a:Attendee)-[:PUBLISHED_AT]->(conf) + RETURN a.name AS name + UNION + WITH * + MATCH (p)-[:KNOWS]->(a:Attendee)-[:PRESENTED_AT]->(conf) + RETURN a.name AS name } RETURN name ---- -==== Read/Write simple updating and conditionally-updating subqueries +=== Updating nested table subqueries -We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `DO`. +We illustrate these by means of an 'old' version of the query, in which `FOREACH` is used, followed by the 'new' version, using `CALL`. Using a single subquery - old version using `FOREACH`: [source, cypher] @@ -455,12 +462,13 @@ FOREACH(x IN range(1, 10) | ) ---- -Using a single subquery - new version using `DO`: +Using a single subquery - new version using `CALL`: [source, cypher] ---- MATCH (r:Root) UNWIND range(1, 10) AS x -DO { +CALL { + WITH * MERGE (c:Child {id: x}) MERGE (r)-[:PARENT]->(c) } @@ -468,12 +476,12 @@ DO { Note how `FOREACH` is addressing two semantic concerns simultaneously; namely looping, and performing updates without affecting the cardinality of the outer query. In the new version of the query shown above, these orthogonal concerns have been separated. -Looping is already handled by `UNWIND`, while `DO` suppresses the increased cardinality from the inner query. +Looping is already handled by `UNWIND`, while `CALL` just activates the inner query to perform the updates without increasing the cardinality. +Note that no new variable bindings are introduced by the inner query since it ends in an updating clause. -`DO` also hides all new variable bindings introduced by the inner query from the outer query. -If `DO` is omitted from the new version of the query shown above, the variable `c` would become visible to the remainder of the query. +Let's look at a double-nested variation. +First let's consider an old version using `FOREACH`: -Doubly-nested subquery - old version using `FOREACH`: [source, cypher] ---- MATCH (r:Root) @@ -487,41 +495,47 @@ FOREACH (x IN range(1, 10) | ) ---- -Doubly-nested subquery - new version using `DO`: +Now consider the new version using `CALL`: + [source, cypher] ---- MATCH (r:Root) UNWIND range(1, 10) AS x AS x -DO { +CALL { + WITH * CREATE (r)-[:PARENT]->(c:Child {id: x}) MERGE (r)-[:PUBLISHES]->(t:Topic {id: r.id + x}) UNWIND range(1, 10) AS y - DO { + CALL { + WITH * CREATE (c)-[p:PARENT]->(:Child {id: c.id * 10 + y}) SET p.id = c.id * 5 + y } } ---- -Conditional `DO` +Finally, below is an example of conditional `CALL`: + [source, cypher] ---- MATCH (r:Root) UNWIND range(1, 10) AS x -DO WHEN x % 2 = 1 THEN { - MERGE (c:Odd:Child {id: x}) - MERGE (r)-[:PARENT]->(c) - } - ELSE { - MERGE (c:Even:Child {id: x}) - MERGE (r)-[:PARENT]->(c) - } -END +CALL { + WHERE x % 2 = 1 + MERGE (c:Odd:Child {id: x}) + MERGE (r)-[:PARENT]->(c) + OTHERWISE + WITH * + MERGE (c:Even:Child {id: x}) + MERGE (r)-[:PARENT]->(c) +} ---- -==== Chained subqueries -Combining nested and chained subqueries +==== Composite statements + +Combining nested subqueries and composite statements: + [source, cypher] ---- MATCH (x)-[:IN]->(:Category {name: "A"}) @@ -539,11 +553,17 @@ WITH x.name AS name ORDER BY x.age RETURN x LIMIT 10 ---- + + +== Considerations + + === Interaction with existing features Apart from the suggested deprecation of the `FOREACH` clause, nested read-only, write-only and read-write subqueries do not interact directly with any existing features. -== Alternatives + +=== Alternatives Alternative syntax has been considered during the production of this document: @@ -553,9 +573,11 @@ Alternative syntax has been considered during the production of this document: ** `SUBQUERY` ** `QUERY` -== What others do -=== SQL +=== What others do + + +==== SQL The following types of subqueries are supported in SQL: @@ -598,7 +620,8 @@ FROM Scalar and list subqueries are addressed in the Scalar Subqueries and List Subqueries CIP. -=== SPARQL + +==== SPARQL https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#subqueries[SPARQL] supports uncorrelated subqueries in the standard, exemplified by: @@ -620,13 +643,14 @@ Owing to the bottom-up nature of SPARQL query evaluation, the supported forms of Variables projected out of the subquery will be visible, or in scope, to the outer query. -== Benefits to this proposal +=== Benefits to this proposal * Increasing the expressivity of the language. * Allowing unified post-processing on results from multiple (sub)queries; this is exemplified by the https://github.com/neo4j/neo4j/issues/2725[request for post-UNION processing]. * Facilitating query readability, construction and maintainability. * Providing a feature familiar to users of SQL. -== Caveats to this proposal + +=== Caveats to this proposal At the current time, we are not aware of any caveats. From 5b6a333091969054f7ae8c08e1d827e7e93e3173 Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 7 May 2018 11:04:08 +0200 Subject: [PATCH 26/27] Added stand-alone nested calls and some clarifications/fix-ups --- ...ested-updating-and-chained-subqueries.adoc | 98 +++++++++++-------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 5f6e11139e..875db5ac8e 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -24,7 +24,7 @@ toc::[] === Motivation -Subqueries - i.e. queries within queries - are a powerful and expressive feature allowing for: +Nested subqueries - i.e. queries within queries - are a powerful and expressive feature allowing for: * Increased query expressivity * Better query construction and readability @@ -35,19 +35,22 @@ Subqueries - i.e. queries within queries - are a powerful and expressive feature === Background -This CIP has been created in tandem with CIP2017-06-18 for adding support for working with multiple graphs to Cypher and relies on the terminology for describing the high-level structure of queries introduced in CIP2017-06-18. -Therefore this proposal is based on the assumption that CIP2017-06-18 will be accepted. +This CIP has been created in tandem with `CIP2017-06-18` for adding support for working with multiple graphs to Cypher and relies on the terminology for describing the high-level structure of queries introduced in `CIP2017-06-18`. +Therefore this proposal is based on the assumption that `CIP2017-06-18` will be accepted. This CIP should also be viewed in light of CIPs for set operations, `EXISTS`, scalar subqueries, and list subqueries. + === Design goals -This proposal follows the following design goals +This proposal follows the following design goals and principles: 1. Ensure that subqueries have the exact same capabilities in terms of consumed inputs, produces outputs, and potential side-effects as regular standalone queries. 2. Ensure that every subquery is a syntactically valid standalone query independent of which variables are provided by the calling context. +3. The calling context controls what kind of nested subquery (graph, table) is required. + == Proposal @@ -56,30 +59,28 @@ Subqueries are self-contained Cypher queries that are usually run within the sco This proposal suggests the introduction of new subquery constructs to Cypher. -* Nested subqueries -** Nested tabular subqueries +* Nested tabular subqueries *** Nested tabular subqueries of the form `CALL { }` *** Optional nested tabular subqueries of the form `OPTIONAL CALL { }` *** Mandatory nested tabular subqueries of the form `MANDATORY CALL { }` -** Nested graph subqueries -*** Create graph subqueries of the form `CREATE GRAPH { }, GRAPH { }, ...` +* Nested graph subqueries *** Read graph subqueries of the form `FROM { }` *** Update graph subqueries of the form `UPDATE { }` -** Grouped nested subqueries -* Syntax simplifications -** Conditional nested subqueries -** Composite statements +* Nested stand-alone subqueries of the form `RETURN|WITH CALL { }` +* Grouped nested subqueries +* Conditional nested subqueries +* Composite statements Both uncorrelated and correlated forms of nested subqueries are supported by this CIP. -This proposal additional suggests removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of nested tabular subqueries). +This proposal additional suggests removing the `FOREACH` clause from the current language (it is rendered obsolete by the introduction of conditional nested subqueries and composite statements). === Nested subqueries -Nested subqueries are always introduced with keywords that are followed by an inner query in curly braces. +Nested subqueries are always introduced with keywords that are followed by the actual subquery in curly braces. -_Definition_: A *nested subquery* is a composite statement that syntactically occurs as an argument to another clause that is enclosed in curly braces. +_Definition_: A *nested subquery* is a composite statement that occurs as an argument to another clause and that syntactically is enclosed in curly braces. Usage of nested subqueries must adhere to the following rules: @@ -87,19 +88,19 @@ Usage of nested subqueries must adhere to the following rules: 2. Nested subqueries that perform updates cannot be contained within nested subqueries in read-only contexts. 3. Nested subqueries are not allowed to contain schema commands -Note:: These restrictions capture current use of Cypher and may be reduced in the future. +Note:: These restrictions capture current use of Cypher and may be removed in the future. Nested subqueries may be correlated - i.e. the inner query may use variables from the outer query - or uncorrelated. -_Definition_: A *correlated nested subquery* is a *nested subquery* that has at least one leading clause that is a `WITH` clause that references a variable from a preceding clauses. +_Definition_: A *correlated nested subquery* is a *nested subquery* that has at least one leading clause that is a `WITH` clause that references a variable from the preceding clauses. _Definition_: An *uncorrelated nested subquery* is a *nested subquery* that has no leading clause that is a `WITH` clause that references a variable from the preceding clauses. A composite statement that is used as a nested subquery may have multiple points of entry. -The following definition captures this concept of entry points into a subquery by using the terminology introduced in CIP2017-06-18: +The following definition captures this concept of entry points into a subquery by using the terminology introduced in `CIP2017-06-18`: -_Definition_: The *leading clauses* of a composite statement are the leading clauses of the first single statement. -The leading clauses of a single statement are the leading clauses of its constituents. +_Definition_: The *leading clauses* of a composite statement are the leading clauses of the first simple statement of the composite statement. +The leading clauses of a simple statement are the leading clauses of its constituents. The leading clause of a simple clause chain is the first clause in the sequence of clauses unless that clause is a call to a nested subquery in which case the leading clauses of the simple clause chain will be taken to be the leading clauses of that nested subquery. The leading clauses of an operator clause chain are the leading clauses of all simple clause chains that are connected directly by the operator clause of the operator clause chain. @@ -108,7 +109,7 @@ The leading clauses of an operator clause chain are the leading clauses of all s A nested table subquery is evaluated for each incoming input record and may produce an arbitrary number of output records. -_Definition_: A *nested table subquery* is a composite statement that returns a table. +_Definition_: A *nested table subquery* is a nested subquery that returns a table. We extend `CALL` with a new syntactic form that allows a nested table subquery argument and may be used either in a stand-alone call or inside a simple clause chain. @@ -159,7 +160,7 @@ Semantics: 1. All rules for <> apply for correlated nested table subqueries unless otherwise noted in this list. -2. All variable bindings of the input record are made available to all leading clauses of the nested table subquery. +2. All variable bindings of the input record are made available to all leading `WITH` clauses of the nested table subquery. 3. The nested subquery may return variables already bound by preceding clauses if it can be shown via simple static analysis that these have just been passed through. It is not required that this analysis takes into account aliasing inside the nested subquery. @@ -168,9 +169,11 @@ It is not required that this analysis takes into account aliasing inside the nes [#optional-table-subqueries] ==== Optional nested table subqueries and procedure calls -A nested table subquery may be prefixed with the keyword `OPTIONAL`. +An optional nested table subquery is a nested table subquery that was prefixed with the keyword `OPTIONAL`. + +1. If calling the nested table subquery returns an empty result, this empty result is replaced with a table that consists of a single record that maps all variables that have been newly introduced by the the nested table subquery to `NULL` and all variables that have been passed through by the nested table subquery to their value in the input record. -If calling the nested table subquery returns an empty result, this empty result is replaced with a table that consists of a single record that maps all variables that have been newly introduced by the the nested table subquery to `NULL` and all variables that have been passed through by the nested table subquery to their value in the input record. +2. An error is raised if an optional nested table subquery is an updating subquery. An implementation may choose to support the same semantics for calling procedures using syntax like `OPTIONAL CALL myProc(...) YIELD ...`. @@ -178,11 +181,11 @@ An implementation may choose to support the same semantics for calling procedure [#mandatory-table-subqueries] ==== Mandatory nested table subqueries and procedure calls -A nested table subquery may be prefixed with the keyword `MANDATORY`. +A mandatory nested table subquery is a nested table subquery that was prefixed with the keyword `MANDATORY`. -If calling the nested table subquery returns an empty result, an error is raised. +1. An error is raised if calling the mandatory nested table subquery returns an empty result. -The same semantics are supported for calling procedures using syntax like `MANDATORY CALL myProc(...) YIELD ...`. +2. The same semantics are supported for calling procedures using syntax like `MANDATORY CALL myProc(...) YIELD ...`. === Nested graph subqueries and procedure calls @@ -191,8 +194,6 @@ _Definition_: A nested graph subquery is a nested subquery that returns a graph. Nested graph subqueries may be used in the following forms: - * `CREATE GRAPH name { AS , ...` will create one or more graphs in the catalog by making a copy of the graph returned by the subquery. - These graphs are logically created in parallel and thus cannot interact with each other. * `[OPTIONAL|MANDATORY] FROM { } | ` will change the working graph for further read operations without affecting the current variable bindings and the cardinality of records available to following clauses. * `[OPTIONAL|MANDATORY] UPDATE { } | ` will change the working graph for further updating operations without affecting the current variable bindings and the cardinality of records available to following clauses. @@ -208,8 +209,8 @@ Semantics: 3. A `MANDATORY` nested graph subquery raises an error if the provided graph argument is an empty graph. -4. An `OPTIONAL` nested graph subquery does not change the working graph if the provided graph argument is an empty graph. - +4. An `OPTIONAL` nested graph subquery change the working graph if the provided graph argument is a non-empty graph, +it will change the working graph to iself (for reading or updating as indicated by `FROM` and `UPDATE`) otherwise. === Grouped nested subqueries @@ -237,20 +238,38 @@ Semantics: 3. Introduced parameters and variables are only visible inside the nested subquery. +=== Nested stand-alone subqueries -=== Conditional nested subqueries +Nested stand-alone subqueries may be used to completely replace the current driving table with an execution result that is to be returned (either a graph, a table, or a void result). + +[source, cypher] +---- +RETURN CALL [PER ...] { ... } +RETURN CALL [PER ...] myProc(...) YIELD ... +---- + +Semantics: + +1. Grouped nested stand-alone subqueries must return a table. + +2. Nested stand-alone subqueries _replace_ all variable bindings in the current scope. + +This mirrors the capabilities of stand-alone calls which can be understood as a syntactic shorthand for a nested stand-alone query. -Correlated nested subqueries may start with a `WHERE ...` clause as a short hand for `WITH * WHERE ...`. + +=== Conditional nested subqueries This CIP proposes the introduction of the `OTHERWISE` operator clause: 1. ` OTHERWISE OTHERWISE ... ` either combines read-only simple clause chains or updating simple clause chains but raises an error when used to combine both read-only and updating simple clause chains. -2. ` OTHERWISE OTHERWISE ... ` raises an error if any two simple clause chains do not either both return a graph or a table with the same fields. +2. ` OTHERWISE OTHERWISE ... ` raises an error if any two simple clause chains do not either both return a graph or a table with the same fields or a void result. -3. If ` OTHERWISE OTHERWISE ... ` is used to combine read-only simple clause chains, it evaluates to the first `` that either returns a non-empty table or a non-empty graph and to `` otherwise. +3. If ` OTHERWISE OTHERWISE ... ` is used to combine read-only simple clause chains, it evaluates to the first `` that returns a non-empty result and to `` otherwise. -4. If ` OTHERWISE OTHERWISE ... ` is used to combine updating simple clause chains, it evaluates to the first `` that performs a side-effects and to `` otherwise. +4. If ` OTHERWISE OTHERWISE ... ` is used to combine updating simple clause chains, it evaluates to the first `` that performs a side-effect and to `` otherwise. + +Furthermore, this CIP proposes that correlated nested subqueries may start with a `WHERE ...` clause as a short hand for `WITH * WHERE ...`. === Composite statements @@ -258,13 +277,14 @@ This CIP proposes the introduction of the `OTHERWISE` operator clause: Simple statements are either simple clause chains or operator clause chains (This is defined in `CIP2017-06-18`). Composite statements allow sequencing simple statements using the `THEN` clause. -The `THEN` clause may be omitted if the preceding clause is a `RETURN` or `RETURN GRAPH` clause. +The `THEN` clause _may_ be omitted if the preceding clause is a `RETURN` or `RETURN GRAPH` clause. This is called composition using vertical juxtaposition. === Discarding variables in scope -Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NOTHING`. +Finally, this CIP proposes new shorthand syntax for discarding all variables in scope without discarding the cardinality of input records using `WITH|RETURN|YIELD NONE`. + == Grammar @@ -298,7 +318,7 @@ The following grammar shows the main syntax of all proposed changes: | `{` WHERE < predicate > < composite statement > `}` ; - ::= < identifier > `(` < args > `)` [ YIELD * | < bindings > ] ; + ::= < identifier > `(` < args > `)` [ YIELD * | < bindings > | NONE ] ; ::= < expr > [ { `,` < expr> } ... ] ; From 077fb18c8fe26e251d1089e8f369c924b5cc1a1c Mon Sep 17 00:00:00 2001 From: Stefan Plantikow Date: Mon, 7 May 2018 11:07:44 +0200 Subject: [PATCH 27/27] Grammar fix --- ...6-06-22-nested-updating-and-chained-subqueries.adoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc index 875db5ac8e..93bc448327 100644 --- a/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc +++ b/cip/1.accepted/CIP2016-06-22-nested-updating-and-chained-subqueries.adoc @@ -293,18 +293,17 @@ The following grammar shows the main syntax of all proposed changes: [source, cypher] ---- - ::= < simple statement > [ { `THEN` < simple statement > } ; + ::= + < simple statement > [ { `THEN` < simple statement > } ... ] ; ::= < query-mode > CALL < query-group > < subquery > | < query-mode > CALL < query-group > < invocation > ; - ::= < query-mode > FROM [ PER * | < keys > ] < subquery > ; + ::= < query-mode > FROM < query-group > < subquery > ; ::= < query-mode > UPDATE < query-group > < subquery > ; - ::= CREATE GRAPH < graph > [ { `,` GRAPH < graph > } ... ] ; - ::= < identifier > < subquery > | < invocation > AS < identifier > | < identifier > @@ -318,7 +317,8 @@ The following grammar shows the main syntax of all proposed changes: | `{` WHERE < predicate > < composite statement > `}` ; - ::= < identifier > `(` < args > `)` [ YIELD * | < bindings > | NONE ] ; + ::= + < identifier > `(` < args > `)` [ YIELD * | < bindings > | NONE ] ; ::= < expr > [ { `,` < expr> } ... ] ;