From d8af2bb0859125a54b409954a430bba1b3e0e5d3 Mon Sep 17 00:00:00 2001 From: Alexey Orlenko Date: Wed, 13 Dec 2023 13:56:56 +0100 Subject: [PATCH] fix: chunk and merge json objects on postgres (#4555) Functions in PostgreSQL can only accept up to 100 arguments, which means that we can't build an object with more than 50 fields using `JSON_BUILD_OBJECT`. To work around that, we chunk the fields into subsets of 50 fields or less, build one or more JSONB objects using one or more `JSONB_BUILD_OBJECT` invocations, and merge them together using the `||` operator (which is not possible with plain JSON). Another alternative that was considered and prototyped first was using `ROW_TO_JSON` but it turned out to not be a suitable replacement for several reasons, the final deal breaker [being the limit of the length of field names](https://github.com/hasura/graphql-engine/issues/4004#issuecomment-593831051) (63 characters). Other problems included the lack of support for `ROW_TO_JSON` on MySQL, which would have required us to have conditional logic in the query builder on the `sql-query-connector` level, which would introduce logic dependent on connector capabilities at an inappropriate abstraction layer, and difficulties in building the query compatible with `ROW_TO_JSON` without overfetching data because we would need to select additional fields (e.g. for filtering and order by) to be able to forward them to a query above without an easy way to exclude them from being added to the JSON object. The workaround with JSONB doesn't suffer from these issues, and is completely isolated on the quaint level without leaking to the query engine. Fixes: https://github.com/prisma/prisma/issues/22298 Closes: https://github.com/prisma/prisma-engines/pull/4550 --- Cargo.lock | 1 + quaint/Cargo.toml | 1 + quaint/src/ast/function.rs | 2 + quaint/src/visitor.rs | 27 +-- quaint/src/visitor/mssql.rs | 12 +- quaint/src/visitor/mysql.rs | 10 + quaint/src/visitor/postgres.rs | 89 +++++++ quaint/src/visitor/sqlite.rs | 10 + .../tests/new/regressions/mod.rs | 1 + .../tests/new/regressions/prisma_22298.rs | 218 ++++++++++++++++++ 10 files changed, 353 insertions(+), 18 deletions(-) create mode 100644 query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_22298.rs diff --git a/Cargo.lock b/Cargo.lock index 51e20dc11a42..b5e3181de783 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3568,6 +3568,7 @@ dependencies = [ "getrandom 0.2.11", "hex", "indoc 0.3.6", + "itertools", "lru-cache", "metrics 0.18.1", "mobc", diff --git a/quaint/Cargo.toml b/quaint/Cargo.toml index 254c27446c9b..0668eba34a2d 100644 --- a/quaint/Cargo.toml +++ b/quaint/Cargo.toml @@ -76,6 +76,7 @@ metrics = "0.18" futures = "0.3" url = "2.1" hex = "0.4" +itertools = "0.10" either = { version = "1.6" } base64 = { version = "0.12.3" } diff --git a/quaint/src/ast/function.rs b/quaint/src/ast/function.rs index 3bcc24c4b072..659cf03bfac3 100644 --- a/quaint/src/ast/function.rs +++ b/quaint/src/ast/function.rs @@ -102,7 +102,9 @@ pub(crate) enum FunctionType<'a> { JsonExtractFirstArrayElem(JsonExtractFirstArrayElem<'a>), #[cfg(any(feature = "postgresql", feature = "mysql"))] JsonUnquote(JsonUnquote<'a>), + #[cfg(feature = "postgresql")] JsonArrayAgg(JsonArrayAgg<'a>), + #[cfg(feature = "postgresql")] JsonBuildObject(JsonBuildObject<'a>), #[cfg(any(feature = "postgresql", feature = "mysql"))] TextSearch(TextSearch<'a>), diff --git a/quaint/src/visitor.rs b/quaint/src/visitor.rs index 57159bd1e9c1..58baa09a791f 100644 --- a/quaint/src/visitor.rs +++ b/quaint/src/visitor.rs @@ -139,6 +139,12 @@ pub trait Visitor<'a> { #[cfg(any(feature = "postgresql", feature = "mysql"))] fn visit_json_unquote(&mut self, json_unquote: JsonUnquote<'a>) -> Result; + #[cfg(feature = "postgresql")] + fn visit_json_array_agg(&mut self, array_agg: JsonArrayAgg<'a>) -> Result; + + #[cfg(feature = "postgresql")] + fn visit_json_build_object(&mut self, build_obj: JsonBuildObject<'a>) -> Result; + #[cfg(any(feature = "postgresql", feature = "mysql"))] fn visit_text_search(&mut self, text_search: TextSearch<'a>) -> Result; @@ -1132,26 +1138,13 @@ pub trait Visitor<'a> { FunctionType::Concat(concat) => { self.visit_concat(concat)?; } + #[cfg(feature = "postgresql")] FunctionType::JsonArrayAgg(array_agg) => { - self.write("JSON_AGG")?; - self.surround_with("(", ")", |s| s.visit_expression(*array_agg.expr))?; + self.visit_json_array_agg(array_agg)?; } + #[cfg(feature = "postgresql")] FunctionType::JsonBuildObject(build_obj) => { - let len = build_obj.exprs.len(); - - self.write("JSON_BUILD_OBJECT")?; - self.surround_with("(", ")", |s| { - for (i, (name, expr)) in build_obj.exprs.into_iter().enumerate() { - s.visit_raw_value(Value::text(name))?; - s.write(", ")?; - s.visit_expression(expr)?; - if i < (len - 1) { - s.write(", ")?; - } - } - - Ok(()) - })?; + self.visit_json_build_object(build_obj)?; } }; diff --git a/quaint/src/visitor/mssql.rs b/quaint/src/visitor/mssql.rs index bf1550b96c31..6f259218ca77 100644 --- a/quaint/src/visitor/mssql.rs +++ b/quaint/src/visitor/mssql.rs @@ -1,6 +1,6 @@ use super::Visitor; #[cfg(any(feature = "postgresql", feature = "mysql"))] -use crate::prelude::{JsonExtract, JsonType, JsonUnquote}; +use crate::prelude::{JsonArrayAgg, JsonBuildObject, JsonExtract, JsonType, JsonUnquote}; use crate::{ ast::{ Column, Comparable, Expression, ExpressionKind, Insert, IntoRaw, Join, JoinData, Joinable, Merge, OnConflict, @@ -656,6 +656,16 @@ impl<'a> Visitor<'a> for Mssql<'a> { unimplemented!("JSON filtering is not yet supported on MSSQL") } + #[cfg(feature = "postgresql")] + fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result { + unimplemented!("JSON_AGG is not yet supported on MSSQL") + } + + #[cfg(feature = "postgresql")] + fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result { + unimplemented!("JSON_BUILD_OBJECT is not yet supported on MSSQL") + } + #[cfg(feature = "postgresql")] fn visit_text_search(&mut self, _text_search: crate::prelude::TextSearch<'a>) -> visitor::Result { unimplemented!("Full-text search is not yet supported on MSSQL") diff --git a/quaint/src/visitor/mysql.rs b/quaint/src/visitor/mysql.rs index 26d0f0d5fd65..aa57db799a34 100644 --- a/quaint/src/visitor/mysql.rs +++ b/quaint/src/visitor/mysql.rs @@ -562,6 +562,16 @@ impl<'a> Visitor<'a> for Mysql<'a> { Ok(()) } + #[cfg(feature = "postgresql")] + fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result { + unimplemented!("JSON_ARRAYAGG is not yet supported on MySQL") + } + + #[cfg(feature = "postgresql")] + fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result { + unimplemented!("JSON_OBJECT is not yet supported on MySQL") + } + fn visit_ordering(&mut self, ordering: Ordering<'a>) -> visitor::Result { let len = ordering.0.len(); diff --git a/quaint/src/visitor/postgres.rs b/quaint/src/visitor/postgres.rs index 749b752709d4..da02c26c3353 100644 --- a/quaint/src/visitor/postgres.rs +++ b/quaint/src/visitor/postgres.rs @@ -2,6 +2,7 @@ use crate::{ ast::*, visitor::{self, Visitor}, }; +use itertools::Itertools; use std::{ fmt::{self, Write}, ops::Deref, @@ -499,6 +500,57 @@ impl<'a> Visitor<'a> for Postgres<'a> { } } + #[cfg(feature = "postgresql")] + fn visit_json_array_agg(&mut self, array_agg: JsonArrayAgg<'a>) -> visitor::Result { + self.write("JSONB_AGG")?; + self.surround_with("(", ")", |s| s.visit_expression(*array_agg.expr))?; + + Ok(()) + } + + #[cfg(feature = "postgresql")] + fn visit_json_build_object(&mut self, build_obj: JsonBuildObject<'a>) -> visitor::Result { + // Functions in PostgreSQL can only accept up to 100 arguments, which means that we can't + // build an object with more than 50 fields using `JSON_BUILD_OBJECT`. To work around + // that, we chunk the fields into subsets of 50 fields or less, build one or more JSONB + // objects using one or more `JSONB_BUILD_OBJECT` invocations, and merge them together + // using the `||` operator (which is not possible with plain JSON). + // + // See . + // + // Another alternative that was considered for the specific use case of loading relations + // in Query Engine was using `ROW_TO_JSON` but it turned out to not be a suitable + // replacement for several reasons, the main one being the limit of the length of field + // names (63 characters). + const MAX_FIELDS: usize = 50; + let num_chunks = build_obj.exprs.len().div_ceil(MAX_FIELDS); + + for (i, chunk) in build_obj.exprs.into_iter().chunks(MAX_FIELDS).into_iter().enumerate() { + let mut chunk = chunk.peekable(); + + self.write("JSONB_BUILD_OBJECT")?; + + self.surround_with("(", ")", |s| { + while let Some((name, expr)) = chunk.next() { + s.visit_raw_value(Value::text(name))?; + s.write(", ")?; + s.visit_expression(expr)?; + if chunk.peek().is_some() { + s.write(", ")?; + } + } + + Ok(()) + })?; + + if i < num_chunks - 1 { + self.write(" || ")?; + } + } + + Ok(()) + } + fn visit_text_search(&mut self, text_search: crate::prelude::TextSearch<'a>) -> visitor::Result { let len = text_search.exprs.len(); self.surround_with("to_tsvector(concat_ws(' ', ", "))", |s| { @@ -1209,4 +1261,41 @@ mod tests { assert_eq!("SELECT MIN(\"enum\")::text, MAX(\"enum\")::text FROM \"User\"", sql); } + + mod test_json_build_object { + use super::*; + + #[test] + fn simple() { + let build_json = build_json_object(3); + let query = Select::default().value(build_json); + let (sql, _) = Postgres::build(query).unwrap(); + + assert_eq!("SELECT JSONB_BUILD_OBJECT('f1', $1, 'f2', $2, 'f3', $3)", sql); + } + + #[test] + fn chunked() { + let build_json = build_json_object(110); + let query = Select::default().value(build_json); + let (sql, _) = Postgres::build(query).unwrap(); + + assert_eq!( + concat!( + "SELECT JSONB_BUILD_OBJECT('f1', $1, 'f2', $2, 'f3', $3, 'f4', $4, 'f5', $5, 'f6', $6, 'f7', $7, 'f8', $8, 'f9', $9, 'f10', $10, 'f11', $11, 'f12', $12, 'f13', $13, 'f14', $14, 'f15', $15, 'f16', $16, 'f17', $17, 'f18', $18, 'f19', $19, 'f20', $20, 'f21', $21, 'f22', $22, 'f23', $23, 'f24', $24, 'f25', $25, 'f26', $26, 'f27', $27, 'f28', $28, 'f29', $29, 'f30', $30, 'f31', $31, 'f32', $32, 'f33', $33, 'f34', $34, 'f35', $35, 'f36', $36, 'f37', $37, 'f38', $38, 'f39', $39, 'f40', $40, 'f41', $41, 'f42', $42, 'f43', $43, 'f44', $44, 'f45', $45, 'f46', $46, 'f47', $47, 'f48', $48, 'f49', $49, 'f50', $50)", + " || JSONB_BUILD_OBJECT('f51', $51, 'f52', $52, 'f53', $53, 'f54', $54, 'f55', $55, 'f56', $56, 'f57', $57, 'f58', $58, 'f59', $59, 'f60', $60, 'f61', $61, 'f62', $62, 'f63', $63, 'f64', $64, 'f65', $65, 'f66', $66, 'f67', $67, 'f68', $68, 'f69', $69, 'f70', $70, 'f71', $71, 'f72', $72, 'f73', $73, 'f74', $74, 'f75', $75, 'f76', $76, 'f77', $77, 'f78', $78, 'f79', $79, 'f80', $80, 'f81', $81, 'f82', $82, 'f83', $83, 'f84', $84, 'f85', $85, 'f86', $86, 'f87', $87, 'f88', $88, 'f89', $89, 'f90', $90, 'f91', $91, 'f92', $92, 'f93', $93, 'f94', $94, 'f95', $95, 'f96', $96, 'f97', $97, 'f98', $98, 'f99', $99, 'f100', $100)", + " || JSONB_BUILD_OBJECT('f101', $101, 'f102', $102, 'f103', $103, 'f104', $104, 'f105', $105, 'f106', $106, 'f107', $107, 'f108', $108, 'f109', $109, 'f110', $110)" + ), + sql + ); + } + + fn build_json_object(num_fields: u32) -> JsonBuildObject<'static> { + let fields = (1..=num_fields) + .map(|i| (format!("f{i}").into(), Expression::from(i as i64))) + .collect(); + + JsonBuildObject { exprs: fields } + } + } } diff --git a/quaint/src/visitor/sqlite.rs b/quaint/src/visitor/sqlite.rs index 9c15ef651694..5e30bc54c78e 100644 --- a/quaint/src/visitor/sqlite.rs +++ b/quaint/src/visitor/sqlite.rs @@ -329,6 +329,16 @@ impl<'a> Visitor<'a> for Sqlite<'a> { unimplemented!("JSON filtering is not yet supported on SQLite") } + #[cfg(feature = "postgresql")] + fn visit_json_array_agg(&mut self, _array_agg: JsonArrayAgg<'a>) -> visitor::Result { + unimplemented!("JSON_AGG is not yet supported on SQLite") + } + + #[cfg(feature = "postgresql")] + fn visit_json_build_object(&mut self, _build_obj: JsonBuildObject<'a>) -> visitor::Result { + unimplemented!("JSON_BUILD_OBJECT is not yet supported on SQLite") + } + fn visit_ordering(&mut self, ordering: Ordering<'a>) -> visitor::Result { let len = ordering.0.len(); diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/mod.rs b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/mod.rs index 2cd2938f916a..deaaa7e84313 100644 --- a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/mod.rs +++ b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/mod.rs @@ -22,6 +22,7 @@ mod prisma_20799; mod prisma_21182; mod prisma_21369; mod prisma_21901; +mod prisma_22298; mod prisma_5952; mod prisma_6173; mod prisma_7010; diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_22298.rs b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_22298.rs new file mode 100644 index 000000000000..4e7c589d14f5 --- /dev/null +++ b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_22298.rs @@ -0,0 +1,218 @@ +use query_engine_tests::*; + +#[test_suite(schema(schema))] +mod many_fields_in_related_table { + use indoc::indoc; + + fn schema() -> String { + indoc! {r#" + model A { + #id(id, Int, @id) + field1 Int + field2 Int + field3 Int + field4 Int + field5 Int + field6 Int + field7 Int + field8 Int + field9 Int + field10 Int + field11 Int + field12 Int + field13 Int + field14 Int + field15 Int + field16 Int + field17 Int + field18 Int + field19 Int + field20 Int + field21 Int + field22 Int + field23 Int + field24 Int + field25 Int + field26 Int + field27 Int + field28 Int + field29 Int + field30 Int + field31 Int + field32 Int + field33 Int + field34 Int + field35 Int + field36 Int + field37 Int + field38 Int + field39 Int + field40 Int + field41 Int + field42 Int + field43 Int + field44 Int + field45 Int + field46 Int + field47 Int + field48 Int + field49 Int + field50 Int + field51 Int + b_id Int + b B @relation(fields: [b_id], references: [id]) + c C[] + } + + model B { + #id(id, Int, @id) + a A[] + } + + model C { + #id(id, Int, @id) + a_id Int + a A @relation(fields: [a_id], references: [id]) + } + "#} + .to_owned() + } + + #[connector_test] + async fn query_53_fields_through_relation(runner: Runner) -> TestResult<()> { + insta::assert_snapshot!( + run_query!(runner, r#" + mutation { + createOneB( + data: { + id: 1, + a: { + create: { + id: 1, + field1: 0, + field2: 0, + field3: 0, + field4: 0, + field5: 0, + field6: 0, + field7: 0, + field8: 0, + field9: 0, + field10: 0, + field11: 0, + field12: 0, + field13: 0, + field14: 0, + field15: 0, + field16: 0, + field17: 0, + field18: 0, + field19: 0, + field20: 0, + field21: 0, + field22: 0, + field23: 0, + field24: 0, + field25: 0, + field26: 0, + field27: 0, + field28: 0, + field29: 0, + field30: 0, + field31: 0, + field32: 0, + field33: 0, + field34: 0, + field35: 0, + field36: 0, + field37: 0, + field38: 0, + field39: 0, + field40: 0, + field41: 0, + field42: 0, + field43: 0, + field44: 0, + field45: 0, + field46: 0, + field47: 0, + field48: 0, + field49: 0, + field50: 0, + field51: 0, + c: { + create: { + id: 1 + } + } + } + } + } + ) { + id + a { + id + field1 + field2 + field3 + field4 + field5 + field6 + field7 + field8 + field9 + field10 + field11 + field12 + field13 + field14 + field15 + field16 + field17 + field18 + field19 + field20 + field21 + field22 + field23 + field24 + field25 + field26 + field27 + field28 + field29 + field30 + field31 + field32 + field33 + field34 + field35 + field36 + field37 + field38 + field39 + field40 + field41 + field42 + field43 + field44 + field45 + field46 + field47 + field48 + field49 + field50 + field51 + c { + id + } + } + } + } + "#), + @r###"{"data":{"createOneB":{"id":1,"a":[{"id":1,"field1":0,"field2":0,"field3":0,"field4":0,"field5":0,"field6":0,"field7":0,"field8":0,"field9":0,"field10":0,"field11":0,"field12":0,"field13":0,"field14":0,"field15":0,"field16":0,"field17":0,"field18":0,"field19":0,"field20":0,"field21":0,"field22":0,"field23":0,"field24":0,"field25":0,"field26":0,"field27":0,"field28":0,"field29":0,"field30":0,"field31":0,"field32":0,"field33":0,"field34":0,"field35":0,"field36":0,"field37":0,"field38":0,"field39":0,"field40":0,"field41":0,"field42":0,"field43":0,"field44":0,"field45":0,"field46":0,"field47":0,"field48":0,"field49":0,"field50":0,"field51":0,"c":[{"id":1}]}]}}}"### + ); + + Ok(()) + } +}