From 013bd45959278608087ea40c6fa37fbea747fa69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 21 Mar 2024 12:05:35 +0100 Subject: [PATCH 01/21] columnar support for arrow tables --- package.json | 1 + src/options.js | 29 ++++++- test/plots/arrow.ts | 52 +++++++++++++ test/plots/index.ts | 1 + yarn.lock | 182 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 260 insertions(+), 5 deletions(-) create mode 100644 test/plots/arrow.ts diff --git a/package.json b/package.json index e1da72155e..918dd27058 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,7 @@ "@types/node": "^20.5.0", "@typescript-eslint/eslint-plugin": "^7.2.0", "@typescript-eslint/parser": "^7.2.0", + "apache-arrow": "^15.0.2", "c8": "^9.1.0", "canvas": "^2.0.0", "d3-geo-projection": "^4.0.0", diff --git a/src/options.js b/src/options.js index d7abbadf0e..9fc174b0f1 100644 --- a/src/options.js +++ b/src/options.js @@ -14,7 +14,7 @@ export const reindex = Symbol("reindex"); export function valueof(data, value, type) { const valueType = typeof value; return valueType === "string" - ? maybeTypedMap(data, field(value), type) + ? columnar(data, value, type) : valueType === "function" ? maybeTypedMap(data, value, type) : valueType === "number" || value instanceof Date || valueType === "boolean" @@ -133,6 +133,7 @@ export function keyword(input, name, allowed) { // Promotes the specified data to an array as needed. export function arrayify(values) { if (values == null || values instanceof Array || values instanceof TypedArray) return values; + if (isArrowTable(values)) return arrowTableProxy(values); switch (values.type) { case "FeatureCollection": return values.features; @@ -575,3 +576,29 @@ export function maybeClip(clip) { else if (clip != null) clip = keyword(clip, "clip", ["frame", "sphere"]); return clip; } + +// Duck typing Apache Arrow tables +function isArrowTable(data) { + return typeof data?.getChild === "function" && typeof data.numRows === "number" && typeof data.slice === "function"; +} + +// Extract columnar data +function columnar(data, name, type) { + return isArrowTable(data) ? maybeTypedArrayify(data.getChild(name), type) : maybeTypedMap(data, field(name), type); +} + +// “Arrayify” but for Arrow tables. We try to avoid materializing the values, +// but the Proxy might be used by the group reducer to construct groupData. +function arrowTableProxy(data) { + return new Proxy(data, { + get(target, prop) { + return prop === "length" + ? target.numRows + : prop === "constructor" // for take/map + ? Array + : typeof prop === "string" && !isNaN(prop) + ? {...target.get(prop)} + : target[prop]; // pass all other properties + } + }); +} diff --git a/test/plots/arrow.ts b/test/plots/arrow.ts new file mode 100644 index 0000000000..5699e765fc --- /dev/null +++ b/test/plots/arrow.ts @@ -0,0 +1,52 @@ +import * as Plot from "@observablehq/plot"; +import * as d3 from "d3"; +import * as Arrow from "apache-arrow"; + +/** + * An arrow table dataset supports direct (getChild) accessors. + */ +export async function arrowTest() { + const data = Arrow.tableFromArrays({ + id: [1, 2, 3], + name: ["Alice", "Bob", "Charlie"], + age: [35, 25, 45] + }); + return Plot.barY(data, {x: "name", y: "age"}).plot(); +} + +/** + * An arrow table dataset supports function accessors. + */ +export async function arrowTestAccessor() { + const data = Arrow.tableFromArrays({ + id: [1, 2, 3], + name: ["Alice", "Bob", "Charlie"], + age: [35, 25, 45] + }); + + return Plot.barY(data, {x: "name", y: "age", fill: (d) => d.name}).plot(); +} + +/** + * An arrow table dataset supports binning. + */ +export async function arrowTestBin() { + const seed = d3.randomLcg(42); + const vector = Uint8Array.from({length: 1e5}, d3.randomExponential.source(seed)(1)); + const category = Array.from({length: 1e5}, d3.randomInt.source(seed)(4)).map((i) => `a${i}`); + const data = Arrow.tableFromArrays({category, vector}); + return Plot.rectY(data, Plot.binX({y: "count"}, {x: "vector", fill: "category", thresholds: 10})).plot({ + marginLeft: 60 + }); +} + +/** + * An arrow table dataset supports grouping. + */ +export async function arrowTestGroup() { + const seed = d3.randomLcg(42); + const vector = Uint8Array.from({length: 1e5}, d3.randomExponential.source(seed)(1)); + const category = Array.from({length: 1e5}, d3.randomInt.source(seed)(4)).map((i) => `a${i}`); + const data = Arrow.tableFromArrays({category, vector}); + return Plot.barY(data, Plot.groupX({y: "count"}, {x: "vector", fill: "category"})).plot({marginLeft: 60}); +} diff --git a/test/plots/index.ts b/test/plots/index.ts index f1b253323e..e224ca5c1b 100644 --- a/test/plots/index.ts +++ b/test/plots/index.ts @@ -11,6 +11,7 @@ export * from "./aapl-volume.js"; export * from "./anscombe-quartet.js"; export * from "./arc.js"; export * from "./armadillo.js"; +export * from "./arrow.js"; export * from "./aspectRatio.js"; export * from "./athletes-bins-colors.js"; export * from "./athletes-birthdays.js"; diff --git a/yarn.lock b/yarn.lock index 83ed001bb6..92b4defaa4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2,6 +2,14 @@ # yarn lockfile v1 +"@75lb/deep-merge@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@75lb/deep-merge/-/deep-merge-1.1.1.tgz#3b06155b90d34f5f8cc2107d796f1853ba02fd6d" + integrity sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw== + dependencies: + lodash.assignwith "^4.2.0" + typical "^7.1.1" + "@algolia/autocomplete-core@1.9.3": version "1.9.3" resolved "https://registry.yarnpkg.com/@algolia/autocomplete-core/-/autocomplete-core-1.9.3.tgz#1d56482a768c33aae0868c8533049e02e8961be7" @@ -617,6 +625,13 @@ dependencies: shiki "1.6.2" +"@swc/helpers@^0.5.2": + version "0.5.7" + resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.7.tgz#36c05f61b412abcff3616ecc8634623bcc7c9618" + integrity sha512-BVvNZhx362+l2tSwSuyEUV4h7+jk9raNdoTSdLfwTshXJSaGmYKluGRJznziCI3KX02Z19DdsQrdfrpXAU3Hfg== + dependencies: + tslib "^2.4.0" + "@ts-morph/common@~0.23.0": version "0.23.0" resolved "https://registry.yarnpkg.com/@ts-morph/common/-/common-0.23.0.tgz#bd4ddbd3f484f29476c8bd985491592ae5fc147e" @@ -627,6 +642,16 @@ mkdirp "^3.0.1" path-browserify "^1.0.1" +"@types/command-line-args@^5.2.1": + version "5.2.3" + resolved "https://registry.yarnpkg.com/@types/command-line-args/-/command-line-args-5.2.3.tgz#553ce2fd5acf160b448d307649b38ffc60d39639" + integrity sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw== + +"@types/command-line-usage@^5.0.2": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.4.tgz#374e4c62d78fbc5a670a0f36da10235af879a0d5" + integrity sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg== + "@types/d3-array@*": version "3.2.1" resolved "https://registry.yarnpkg.com/@types/d3-array/-/d3-array-3.2.1.tgz#1f6658e3d2006c4fceac53fde464166859f8b8c5" @@ -882,6 +907,13 @@ dependencies: undici-types "~5.26.4" +"@types/node@^20.6.0": + version "20.11.30" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.11.30.tgz#9c33467fc23167a347e73834f788f4b9f399d66f" + integrity sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw== + dependencies: + undici-types "~5.26.4" + "@types/resolve@1.20.2": version "1.20.2" resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" @@ -1218,6 +1250,21 @@ anymatch@~3.1.2: normalize-path "^3.0.0" picomatch "^2.0.4" +apache-arrow@^15.0.2: + version "15.0.2" + resolved "https://registry.yarnpkg.com/apache-arrow/-/apache-arrow-15.0.2.tgz#d87c6447d64d6fab34aa70119362680b6617ce63" + integrity sha512-RvwlFxLRpO405PLGffx4N2PYLiF7FD86Q1hHl6J2XCWiq+tTCzpb9ngFw0apFDcXZBMpCzMuwAvA7hjyL1/73A== + dependencies: + "@swc/helpers" "^0.5.2" + "@types/command-line-args" "^5.2.1" + "@types/command-line-usage" "^5.0.2" + "@types/node" "^20.6.0" + command-line-args "^5.2.1" + command-line-usage "^7.0.1" + flatbuffers "^23.5.26" + json-bignum "^0.0.3" + tslib "^2.6.2" + "aproba@^1.0.3 || ^2.0.0": version "2.0.0" resolved "https://registry.yarnpkg.com/aproba/-/aproba-2.0.0.tgz#52520b8ae5b569215b354efc0caa3fe1e45a8adc" @@ -1236,6 +1283,16 @@ argparse@^2.0.1: resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== +array-back@^3.0.1, array-back@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/array-back/-/array-back-3.1.0.tgz#b8859d7a508871c9a7b2cf42f99428f65e96bfb0" + integrity sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q== + +array-back@^6.2.2: + version "6.2.2" + resolved "https://registry.yarnpkg.com/array-back/-/array-back-6.2.2.tgz#f567d99e9af88a6d3d2f9dfcc21db6f9ba9fd157" + integrity sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw== + array-union@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d" @@ -1334,7 +1391,14 @@ canvas@^2.0.0: nan "^2.17.0" simple-get "^3.0.3" -chalk@^4.0.0, chalk@^4.1.0: +chalk-template@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/chalk-template/-/chalk-template-0.4.0.tgz#692c034d0ed62436b9062c1707fadcd0f753204b" + integrity sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg== + dependencies: + chalk "^4.1.2" + +chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2: version "4.1.2" resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== @@ -1409,6 +1473,26 @@ combined-stream@^1.0.8: dependencies: delayed-stream "~1.0.0" +command-line-args@^5.2.1: + version "5.2.1" + resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.2.1.tgz#c44c32e437a57d7c51157696893c5909e9cec42e" + integrity sha512-H4UfQhZyakIjC74I9d34fGYDwk3XpSr17QhEd0Q3I9Xq1CETHo4Hcuo87WyWHpAF1aSLjLRf5lD9ZGX2qStUvg== + dependencies: + array-back "^3.1.0" + find-replace "^3.0.0" + lodash.camelcase "^4.3.0" + typical "^4.0.0" + +command-line-usage@^7.0.0, command-line-usage@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-7.0.1.tgz#e540afef4a4f3bc501b124ffde33956309100655" + integrity sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ== + dependencies: + array-back "^6.2.2" + chalk-template "^0.4.0" + table-layout "^3.0.0" + typical "^7.1.1" + commander@2, commander@^2.20.0: version "2.20.3" resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" @@ -2037,6 +2121,13 @@ fill-range@^7.1.1: dependencies: to-regex-range "^5.0.1" +find-replace@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-3.0.0.tgz#3e7e23d3b05167a76f770c9fbd5258b0def68c38" + integrity sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ== + dependencies: + array-back "^3.0.1" + find-up@5.0.0, find-up@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" @@ -2059,6 +2150,11 @@ flat@^5.0.2: resolved "https://registry.yarnpkg.com/flat/-/flat-5.0.2.tgz#8ca6fe332069ffa9d324c327198c598259ceb241" integrity sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ== +flatbuffers@^23.5.26: + version "23.5.26" + resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-23.5.26.tgz#01358e272a61239f0faf3bfbe4e014f3ace9d746" + integrity sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ== + flatted@^3.2.9: version "3.3.1" resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a" @@ -2498,6 +2594,11 @@ jsdom@^24.0.0: ws "^8.17.0" xml-name-validator "^5.0.0" +json-bignum@^0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/json-bignum/-/json-bignum-0.0.3.tgz#41163b50436c773d82424dbc20ed70db7604b8d7" + integrity sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg== + json-buffer@3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.1.tgz#9338802a30d3b6605fbe0613e094008ca8c05a13" @@ -2535,6 +2636,16 @@ locate-path@^6.0.0: dependencies: p-locate "^5.0.0" +lodash.assignwith@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz#127a97f02adc41751a954d24b0de17e100e038eb" + integrity sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g== + +lodash.camelcase@^4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6" + integrity sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA== + lodash.merge@^4.6.2: version "4.6.2" resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" @@ -3188,7 +3299,21 @@ speakingurl@^14.0.1: resolved "https://registry.yarnpkg.com/speakingurl/-/speakingurl-14.0.1.tgz#f37ec8ddc4ab98e9600c1c9ec324a8c48d772a53" integrity sha512-1POYv7uv2gXoyGFpBCmpDVSNV74IfsWlDW216UPjbWufNf+bSU6GdbDsxdcxtfwb4xlI3yxzOTKClUosxARYrQ== -"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +stream-read-all@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/stream-read-all/-/stream-read-all-3.0.1.tgz#60762ae45e61d93ba0978cda7f3913790052ad96" + integrity sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A== + +"string-width-cjs@npm:string-width@^4.2.0": + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -3213,7 +3338,14 @@ string_decoder@^1.1.1: dependencies: safe-buffer "~5.2.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -3261,6 +3393,19 @@ tabbable@^6.2.0: resolved "https://registry.yarnpkg.com/tabbable/-/tabbable-6.2.0.tgz#732fb62bc0175cfcec257330be187dcfba1f3b97" integrity sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew== +table-layout@^3.0.0: + version "3.0.2" + resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-3.0.2.tgz#69c2be44388a5139b48c59cf21e73b488021769a" + integrity sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw== + dependencies: + "@75lb/deep-merge" "^1.1.1" + array-back "^6.2.2" + command-line-args "^5.2.1" + command-line-usage "^7.0.0" + stream-read-all "^3.0.1" + typical "^7.1.1" + wordwrapjs "^5.1.0" + tar@^6.1.11: version "6.2.1" resolved "https://registry.yarnpkg.com/tar/-/tar-6.2.1.tgz#717549c541bc3c2af15751bea94b1dd068d4b03a" @@ -3346,6 +3491,11 @@ ts-morph@^22.0.0: "@ts-morph/common" "~0.23.0" code-block-writer "^13.0.1" +tslib@^2.4.0, tslib@^2.6.2: + version "2.6.2" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" + integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== + tsx@^4.7.0: version "4.11.1" resolved "https://registry.yarnpkg.com/tsx/-/tsx-4.11.1.tgz#133adc0e08324553e820a813347e697761339b31" @@ -3373,6 +3523,16 @@ typescript@^5.0.2: resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.4.5.tgz#42ccef2c571fdbd0f6718b1d1f5e6e5ef006f611" integrity sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ== +typical@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/typical/-/typical-4.0.0.tgz#cbeaff3b9d7ae1e2bbfaf5a4e6f11eccfde94fc4" + integrity sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw== + +typical@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/typical/-/typical-7.1.1.tgz#ba177ab7ab103b78534463ffa4c0c9754523ac1f" + integrity sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA== + undici-types@~5.26.4: version "5.26.5" resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" @@ -3525,12 +3685,26 @@ word-wrap@^1.2.5: resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34" integrity sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA== +wordwrapjs@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-5.1.0.tgz#4c4d20446dcc670b14fa115ef4f8fd9947af2b3a" + integrity sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg== + workerpool@6.2.1: version "6.2.1" resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.2.1.tgz#46fc150c17d826b86a008e5a4508656777e9c343" integrity sha512-ILEIE97kDZvF9Wb9f6h5aXK4swSlKGUcOEGiIYb2OOu/IrDU9iwj0fD//SsA6E5ibwJxpEvhullJY4Sl4GcpAw== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== From 019a79374608388c846db50bd48a430a95554ccb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 21 Mar 2024 12:17:34 +0100 Subject: [PATCH 02/21] defer reading the values until they're actually requested (which is often not the case) --- src/options.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.js b/src/options.js index 9fc174b0f1..0097aae721 100644 --- a/src/options.js +++ b/src/options.js @@ -597,7 +597,7 @@ function arrowTableProxy(data) { : prop === "constructor" // for take/map ? Array : typeof prop === "string" && !isNaN(prop) - ? {...target.get(prop)} + ? target.get(prop) : target[prop]; // pass all other properties } }); From 778b8907a3ac05ffd7c9c67650acd6d318c8bb37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 21 Mar 2024 12:23:08 +0100 Subject: [PATCH 03/21] tests --- test/output/arrowTest.svg | 61 +++++++++++++++ test/output/arrowTestAccessor.svg | 61 +++++++++++++++ test/output/arrowTestBin.svg | 116 +++++++++++++++++++++++++++ test/output/arrowTestGroup.svg | 126 ++++++++++++++++++++++++++++++ 4 files changed, 364 insertions(+) create mode 100644 test/output/arrowTest.svg create mode 100644 test/output/arrowTestAccessor.svg create mode 100644 test/output/arrowTestBin.svg create mode 100644 test/output/arrowTestGroup.svg diff --git a/test/output/arrowTest.svg b/test/output/arrowTest.svg new file mode 100644 index 0000000000..acc3317162 --- /dev/null +++ b/test/output/arrowTest.svg @@ -0,0 +1,61 @@ + + + + + 0 + 5 + 10 + 15 + 20 + 25 + 30 + 35 + 40 + 45 + + + ↑ age + + + + Alice + Bob + Charlie + + + name + + + + + + + \ No newline at end of file diff --git a/test/output/arrowTestAccessor.svg b/test/output/arrowTestAccessor.svg new file mode 100644 index 0000000000..3fb14f8d58 --- /dev/null +++ b/test/output/arrowTestAccessor.svg @@ -0,0 +1,61 @@ + + + + + 0 + 5 + 10 + 15 + 20 + 25 + 30 + 35 + 40 + 45 + + + ↑ age + + + + Alice + Bob + Charlie + + + name + + + + + + + \ No newline at end of file diff --git a/test/output/arrowTestBin.svg b/test/output/arrowTestBin.svg new file mode 100644 index 0000000000..4d43d4dc7b --- /dev/null +++ b/test/output/arrowTestBin.svg @@ -0,0 +1,116 @@ + + + + + 0 + 5,000 + 10,000 + 15,000 + 20,000 + 25,000 + 30,000 + 35,000 + 40,000 + 45,000 + 50,000 + 55,000 + 60,000 + + + ↑ Frequency + + + + 0 + 2 + 4 + 6 + 8 + 10 + 12 + + + vector → + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/output/arrowTestGroup.svg b/test/output/arrowTestGroup.svg new file mode 100644 index 0000000000..77e741032b --- /dev/null +++ b/test/output/arrowTestGroup.svg @@ -0,0 +1,126 @@ + + + + + 0 + 5,000 + 10,000 + 15,000 + 20,000 + 25,000 + 30,000 + 35,000 + 40,000 + 45,000 + 50,000 + 55,000 + 60,000 + + + ↑ Frequency + + + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + + + vector + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 932186c89ddcdc4adc73264407fba67be4bc8bc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 21 Mar 2024 13:44:59 +0100 Subject: [PATCH 04/21] comment --- src/options.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.js b/src/options.js index 0097aae721..0a909c2c9e 100644 --- a/src/options.js +++ b/src/options.js @@ -587,8 +587,8 @@ function columnar(data, name, type) { return isArrowTable(data) ? maybeTypedArrayify(data.getChild(name), type) : maybeTypedMap(data, field(name), type); } -// “Arrayify” but for Arrow tables. We try to avoid materializing the values, -// but the Proxy might be used by the group reducer to construct groupData. +// Arrayify arrow tables. We try to avoid materializing the values, but the +// Proxy might be used by the group reducer to construct groupData. function arrowTableProxy(data) { return new Proxy(data, { get(target, prop) { From 1f5ded90e511dd01134b58d96c7139af517de627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 26 Jun 2024 16:55:56 +0200 Subject: [PATCH 05/21] fix apache arrow dates (alternative to #2096) --- package.json | 2 +- src/options.js | 8 +++++- test/plots/arrow-dates.ts | 9 +++++++ test/plots/index.ts | 1 + yarn.lock | 52 +++++++++++++++++++-------------------- 5 files changed, 44 insertions(+), 28 deletions(-) create mode 100644 test/plots/arrow-dates.ts diff --git a/package.json b/package.json index 918dd27058..ae95183e6b 100644 --- a/package.json +++ b/package.json @@ -56,7 +56,7 @@ "@types/node": "^20.5.0", "@typescript-eslint/eslint-plugin": "^7.2.0", "@typescript-eslint/parser": "^7.2.0", - "apache-arrow": "^15.0.2", + "apache-arrow": "^16.0.2", "c8": "^9.1.0", "canvas": "^2.0.0", "d3-geo-projection": "^4.0.0", diff --git a/src/options.js b/src/options.js index 0a909c2c9e..9e543e42ca 100644 --- a/src/options.js +++ b/src/options.js @@ -584,7 +584,13 @@ function isArrowTable(data) { // Extract columnar data function columnar(data, name, type) { - return isArrowTable(data) ? maybeTypedArrayify(data.getChild(name), type) : maybeTypedMap(data, field(name), type); + if (isArrowTable(data)) { + const column = maybeTypedArrayify(data.getChild(name), type); + if (Array.isArray(column) && String(data.schema?.fields?.find((d) => d.name === name)).endsWith("")) + column.find((d, i) => d != null && (column[i] = new Date(d))); + return column; + } + return maybeTypedMap(data, field(name), type); } // Arrayify arrow tables. We try to avoid materializing the values, but the diff --git a/test/plots/arrow-dates.ts b/test/plots/arrow-dates.ts new file mode 100644 index 0000000000..52bb49b221 --- /dev/null +++ b/test/plots/arrow-dates.ts @@ -0,0 +1,9 @@ +import * as Plot from "@observablehq/plot"; +import * as Arrow from "apache-arrow"; +import * as d3 from "d3"; + +export async function arrowDates() { + const athletes = await d3.csv("data/athletes.csv", d3.autoType); + const table = Arrow.tableFromJSON(athletes); + return Plot.rectY(table, Plot.binX(undefined, {x: "date_of_birth"})).plot(); +} diff --git a/test/plots/index.ts b/test/plots/index.ts index e224ca5c1b..6920b1ba0b 100644 --- a/test/plots/index.ts +++ b/test/plots/index.ts @@ -12,6 +12,7 @@ export * from "./anscombe-quartet.js"; export * from "./arc.js"; export * from "./armadillo.js"; export * from "./arrow.js"; +export * from "./arrow-dates.js"; export * from "./aspectRatio.js"; export * from "./athletes-bins-colors.js"; export * from "./athletes-birthdays.js"; diff --git a/yarn.lock b/yarn.lock index 92b4defaa4..55cd82b36b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -625,10 +625,10 @@ dependencies: shiki "1.6.2" -"@swc/helpers@^0.5.2": - version "0.5.7" - resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.7.tgz#36c05f61b412abcff3616ecc8634623bcc7c9618" - integrity sha512-BVvNZhx362+l2tSwSuyEUV4h7+jk9raNdoTSdLfwTshXJSaGmYKluGRJznziCI3KX02Z19DdsQrdfrpXAU3Hfg== +"@swc/helpers@^0.5.10": + version "0.5.11" + resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7" + integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A== dependencies: tslib "^2.4.0" @@ -642,12 +642,12 @@ mkdirp "^3.0.1" path-browserify "^1.0.1" -"@types/command-line-args@^5.2.1": +"@types/command-line-args@^5.2.3": version "5.2.3" resolved "https://registry.yarnpkg.com/@types/command-line-args/-/command-line-args-5.2.3.tgz#553ce2fd5acf160b448d307649b38ffc60d39639" integrity sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw== -"@types/command-line-usage@^5.0.2": +"@types/command-line-usage@^5.0.4": version "5.0.4" resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.4.tgz#374e4c62d78fbc5a670a0f36da10235af879a0d5" integrity sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg== @@ -900,6 +900,13 @@ resolved "https://registry.yarnpkg.com/@types/mocha/-/mocha-10.0.6.tgz#818551d39113081048bdddbef96701b4e8bb9d1b" integrity sha512-dJvrYWxP/UcXm36Qn36fxhUKu8A/xMRXVT2cliFF1Z7UA9liG5Psj3ezNSZw+5puH2czDXRLcXQxf8JbJt0ejg== +"@types/node@^20.12.7": + version "20.14.9" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.9.tgz#12e8e765ab27f8c421a1820c99f5f313a933b420" + integrity sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg== + dependencies: + undici-types "~5.26.4" + "@types/node@^20.5.0": version "20.14.0" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.0.tgz#49ceec7b34f8621470cff44677fa9d461a477f17" @@ -907,13 +914,6 @@ dependencies: undici-types "~5.26.4" -"@types/node@^20.6.0": - version "20.11.30" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.11.30.tgz#9c33467fc23167a347e73834f788f4b9f399d66f" - integrity sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw== - dependencies: - undici-types "~5.26.4" - "@types/resolve@1.20.2": version "1.20.2" resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" @@ -1250,18 +1250,18 @@ anymatch@~3.1.2: normalize-path "^3.0.0" picomatch "^2.0.4" -apache-arrow@^15.0.2: - version "15.0.2" - resolved "https://registry.yarnpkg.com/apache-arrow/-/apache-arrow-15.0.2.tgz#d87c6447d64d6fab34aa70119362680b6617ce63" - integrity sha512-RvwlFxLRpO405PLGffx4N2PYLiF7FD86Q1hHl6J2XCWiq+tTCzpb9ngFw0apFDcXZBMpCzMuwAvA7hjyL1/73A== +apache-arrow@^16.0.2: + version "16.1.0" + resolved "https://registry.yarnpkg.com/apache-arrow/-/apache-arrow-16.1.0.tgz#7aa8d0d436dd0995d9dc5c36febf380d5b207209" + integrity sha512-G6GiM6tzPDdGnKUnVkvVr1Nt5+hUaCMBISiasMSiJwI5L5GKDv5Du7Avc2kxlFfB/LEK2LTqh2GKSxutMdf8vQ== dependencies: - "@swc/helpers" "^0.5.2" - "@types/command-line-args" "^5.2.1" - "@types/command-line-usage" "^5.0.2" - "@types/node" "^20.6.0" + "@swc/helpers" "^0.5.10" + "@types/command-line-args" "^5.2.3" + "@types/command-line-usage" "^5.0.4" + "@types/node" "^20.12.7" command-line-args "^5.2.1" command-line-usage "^7.0.1" - flatbuffers "^23.5.26" + flatbuffers "^24.3.25" json-bignum "^0.0.3" tslib "^2.6.2" @@ -2150,10 +2150,10 @@ flat@^5.0.2: resolved "https://registry.yarnpkg.com/flat/-/flat-5.0.2.tgz#8ca6fe332069ffa9d324c327198c598259ceb241" integrity sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ== -flatbuffers@^23.5.26: - version "23.5.26" - resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-23.5.26.tgz#01358e272a61239f0faf3bfbe4e014f3ace9d746" - integrity sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ== +flatbuffers@^24.3.25: + version "24.3.25" + resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-24.3.25.tgz#e2f92259ba8aa53acd0af7844afb7c7eb95e7089" + integrity sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ== flatted@^3.2.9: version "3.3.1" From 4e729d0bab1c8f6cb583859746f39a18004a6dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 26 Jun 2024 17:01:32 +0200 Subject: [PATCH 06/21] add test snapshot --- test/output/arrowDates.svg | 120 +++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 test/output/arrowDates.svg diff --git a/test/output/arrowDates.svg b/test/output/arrowDates.svg new file mode 100644 index 0000000000..7884b8522a --- /dev/null +++ b/test/output/arrowDates.svg @@ -0,0 +1,120 @@ + + + + + 0 + 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + + + ↑ Frequency + + + + 1955 + 1960 + 1965 + 1970 + 1975 + 1980 + 1985 + 1990 + 1995 + 2000 + + + date_of_birth → + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From f045315595b634849f062d393c95874989a26819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 26 Jun 2024 17:08:35 +0200 Subject: [PATCH 07/21] fix test wrt https://github.com/apache/arrow/issues/40718 --- test/output/arrowDates.svg | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/output/arrowDates.svg b/test/output/arrowDates.svg index 7884b8522a..de1e69765f 100644 --- a/test/output/arrowDates.svg +++ b/test/output/arrowDates.svg @@ -68,22 +68,23 @@ date_of_birth → - - + + + - + - + - + - + - + - - + + From c6704e48ae643b7b04403aa929c3cf90b03a173c Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 09:32:11 -0400 Subject: [PATCH 08/21] arrow table data; fix BigInt coercion --- src/mark.d.ts | 1 + src/mark.js | 10 +++++--- src/options.js | 66 ++++++++++++++++++++++++-------------------------- 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/mark.d.ts b/src/mark.d.ts index 54f14276ca..f3dd42bd7a 100644 --- a/src/mark.d.ts +++ b/src/mark.d.ts @@ -32,6 +32,7 @@ export type TipPointer = "x" | "y" | "xy"; * * - an array, typed array, or other iterable * - an object with a length property and indexed values + * - an Apache Arrow Table */ export type Data = Iterable | ArrayLike; diff --git a/src/mark.js b/src/mark.js index 768105025e..a5c0788b9e 100644 --- a/src/mark.js +++ b/src/mark.js @@ -2,7 +2,7 @@ import {channelDomain, createChannels, valueObject} from "./channel.js"; import {defined} from "./defined.js"; import {maybeFacetAnchor} from "./facet.js"; import {maybeClip, maybeNamed, maybeValue} from "./options.js"; -import {arrayify, isDomainSort, isObject, isOptions, keyword, range, singleton} from "./options.js"; +import {arrayify, isArrowTable, isDomainSort, isObject, isOptions, keyword, range, singleton} from "./options.js"; import {project} from "./projection.js"; import {styles} from "./style.js"; import {basic, initializer} from "./transforms/basic.js"; @@ -87,10 +87,10 @@ export class Mark { } } initialize(facets, facetChannels, plotOptions) { - let data = arrayify(this.data); + let data = dataify(this.data); if (facets === undefined && data != null) facets = [range(data)]; const originalFacets = facets; - if (this.transform != null) ({facets, data} = this.transform(data, facets, plotOptions)), (data = arrayify(data)); + if (this.transform != null) ({facets, data} = this.transform(data, facets, plotOptions)), (data = dataify(data)); if (facets !== undefined) facets.original = originalFacets; // needed to read facetChannels const channels = createChannels(this.channels, data); if (this.sort != null) channelDomain(data, facets, channels, facetChannels, this.sort); // mutates facetChannels! @@ -130,6 +130,10 @@ export class Mark { } } +function dataify(data) { + return isArrowTable(data) ? data : arrayify(data); +} + export function marks(...marks) { marks.plot = Mark.prototype.plot; return marks; diff --git a/src/options.js b/src/options.js index 9e543e42ca..caabd898c9 100644 --- a/src/options.js +++ b/src/options.js @@ -7,6 +7,22 @@ import {timeInterval, utcInterval} from "./time.js"; export const TypedArray = Object.getPrototypeOf(Uint8Array); const objectToString = Object.prototype.toString; +function isNumberArray(value) { + return value instanceof TypedArray && !isBigIntArray(value); +} + +function isNumberType(type) { + return type?.prototype instanceof TypedArray && !isBigIntType(type); +} + +function isBigIntArray(value) { + return value instanceof BigInt64Array || value instanceof BigUint64Array; +} + +function isBigIntType(type) { + return type === BigInt64Array || type === BigUint64Array; +} + // If a reindex is attached to the data, channel values expressed as arrays will // be reindexed when the channels are instantiated. See exclusiveFacets. export const reindex = Symbol("reindex"); @@ -14,7 +30,9 @@ export const reindex = Symbol("reindex"); export function valueof(data, value, type) { const valueType = typeof value; return valueType === "string" - ? columnar(data, value, type) + ? isArrowTable(data) + ? maybeTypedArrayify(data.getChild(value).toArray(), type) // TODO retain date/type hint? + : maybeTypedMap(data, field(value), type) : valueType === "function" ? maybeTypedMap(data, value, type) : valueType === "number" || value instanceof Date || valueType === "boolean" @@ -29,7 +47,7 @@ function maybeTake(values, index) { } function maybeTypedMap(data, f, type) { - return map(data, type?.prototype instanceof TypedArray ? floater(f) : f, type); + return map(data, isNumberType(type) ? floater(f) : f, type); // coerce maybe BigInt to Number to avoid error } function maybeTypedArrayify(data, type) { @@ -37,7 +55,7 @@ function maybeTypedArrayify(data, type) { ? arrayify(data) // preserve undefined type : data instanceof type ? data - : type.prototype instanceof TypedArray && !(data instanceof TypedArray) + : isNumberType(type) && !isNumberArray(data) ? type.from(data, coerceNumber) : type.from(data); } @@ -70,7 +88,7 @@ export function percentile(reduce) { // If the values are specified as a typed array, no coercion is required. export function coerceNumbers(values) { - return values instanceof TypedArray ? values : map(values, coerceNumber, Float64Array); + return isNumberArray(values) ? values : map(values, coerceNumber, Float64Array); } // Unlike Mark’s number, here we want to convert null and undefined to NaN since @@ -133,7 +151,6 @@ export function keyword(input, name, allowed) { // Promotes the specified data to an array as needed. export function arrayify(values) { if (values == null || values instanceof Array || values instanceof TypedArray) return values; - if (isArrowTable(values)) return arrowTableProxy(values); switch (values.type) { case "FeatureCollection": return values.features; @@ -577,34 +594,13 @@ export function maybeClip(clip) { return clip; } -// Duck typing Apache Arrow tables -function isArrowTable(data) { - return typeof data?.getChild === "function" && typeof data.numRows === "number" && typeof data.slice === "function"; -} - -// Extract columnar data -function columnar(data, name, type) { - if (isArrowTable(data)) { - const column = maybeTypedArrayify(data.getChild(name), type); - if (Array.isArray(column) && String(data.schema?.fields?.find((d) => d.name === name)).endsWith("")) - column.find((d, i) => d != null && (column[i] = new Date(d))); - return column; - } - return maybeTypedMap(data, field(name), type); -} - -// Arrayify arrow tables. We try to avoid materializing the values, but the -// Proxy might be used by the group reducer to construct groupData. -function arrowTableProxy(data) { - return new Proxy(data, { - get(target, prop) { - return prop === "length" - ? target.numRows - : prop === "constructor" // for take/map - ? Array - : typeof prop === "string" && !isNaN(prop) - ? target.get(prop) - : target[prop]; // pass all other properties - } - }); +// https://github.com/observablehq/stdlib/blob/746ca2e69135df6178e4f3a17244def35d8d6b20/src/arrow.js#L4C1-L17C1 +export function isArrowTable(value) { + return ( + value && + typeof value.getChild === "function" && + typeof value.toArray === "function" && + value.schema && + Array.isArray(value.schema.fields) + ); } From e7ed7b7e005fab188a45c20a8c7a69cdf6f1fea8 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 09:39:53 -0400 Subject: [PATCH 09/21] more arrow support --- src/options.js | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/options.js b/src/options.js index caabd898c9..a96168910d 100644 --- a/src/options.js +++ b/src/options.js @@ -7,6 +7,10 @@ import {timeInterval, utcInterval} from "./time.js"; export const TypedArray = Object.getPrototypeOf(Uint8Array); const objectToString = Object.prototype.toString; +function isArray(value) { + return value instanceof Array || value instanceof TypedArray; +} + function isNumberArray(value) { return value instanceof TypedArray && !isBigIntArray(value); } @@ -150,7 +154,7 @@ export function keyword(input, name, allowed) { // Promotes the specified data to an array as needed. export function arrayify(values) { - if (values == null || values instanceof Array || values instanceof TypedArray) return values; + if (values == null || isArray(values)) return values; switch (values.type) { case "FeatureCollection": return values.features; @@ -253,20 +257,15 @@ export function maybeZ({z, fill, stroke} = {}) { // Returns a Uint32Array with elements [0, 1, 2, … data.length - 1]. export function range(data) { - const n = data.length; + const n = isArray(data) ? data.length : data.numRows; const r = new Uint32Array(n); for (let i = 0; i < n; ++i) r[i] = i; return r; } -// Returns a filtered range of data given the test function. -export function where(data, test) { - return range(data).filter((i) => test(data[i], i, data)); -} - // Returns an array [values[index[0]], values[index[1]], …]. export function take(values, index) { - return map(index, (i) => values[i], values.constructor); + return isArray(values) ? map(index, (i) => values[i], values.constructor) : map(index, (i) => values.at(i)); } // If f does not take exactly one argument, wraps it in a function that uses take. From fcd3ce69d690f364083951071dae74fe68e252ab Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 09:50:04 -0400 Subject: [PATCH 10/21] arrow date hint; fix BigInt coercion --- src/options.js | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/options.js b/src/options.js index a96168910d..a160231414 100644 --- a/src/options.js +++ b/src/options.js @@ -35,7 +35,7 @@ export function valueof(data, value, type) { const valueType = typeof value; return valueType === "string" ? isArrowTable(data) - ? maybeTypedArrayify(data.getChild(value).toArray(), type) // TODO retain date/type hint? + ? maybeTypedArrowify(data.getChild(value), type) : maybeTypedMap(data, field(value), type) : valueType === "function" ? maybeTypedMap(data, value, type) @@ -64,6 +64,12 @@ function maybeTypedArrayify(data, type) { : type.from(data); } +function maybeTypedArrowify(vector, type = Array) { + return type === Array && isArrowDateType(vector.type) + ? coerceDates(vector.toArray()) + : maybeTypedArrayify(vector.toArray(), type); +} + function floater(f) { return (d, i) => coerceNumber(f(d, i)); } @@ -117,7 +123,7 @@ export function coerceDate(x) { ? x : typeof x === "string" ? isoParse(x) - : x == null || isNaN((x = +x)) + : x == null || isNaN((x = Number(x))) // allow conversion from BigInt ? undefined : new Date(x); } @@ -603,3 +609,14 @@ export function isArrowTable(value) { Array.isArray(value.schema.fields) ); } + +// Apache Arrow now represents dates as numbers. We currently only support +// implicit coercion to JavaScript Date objects when the numbers represent +// milliseconds since Unix epoch. +function isArrowDateType(type) { + return ( + (type.typeId === 8 || // date + type.typeId === 10) && // timestamp + type.unit === 1 // millisecond + ); +} From 78563779cd5be34585d2d772b7a8a2da7f1e8c6f Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 10:09:55 -0400 Subject: [PATCH 11/21] inline floater --- src/options.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/options.js b/src/options.js index a160231414..2de25c2f2b 100644 --- a/src/options.js +++ b/src/options.js @@ -51,7 +51,7 @@ function maybeTake(values, index) { } function maybeTypedMap(data, f, type) { - return map(data, isNumberType(type) ? floater(f) : f, type); // coerce maybe BigInt to Number to avoid error + return map(data, isNumberType(type) ? (d, i) => coerceNumber(f(d, i)) : f, type); // allow conversion from BigInt } function maybeTypedArrayify(data, type) { @@ -70,10 +70,6 @@ function maybeTypedArrowify(vector, type = Array) { : maybeTypedArrayify(vector.toArray(), type); } -function floater(f) { - return (d, i) => coerceNumber(f(d, i)); -} - export const singleton = [null]; // for data-less decoration marks, e.g. frame export const field = (name) => (d) => { const v = d[name]; return v === undefined && d.type === "Feature" ? d.properties?.[name] : v; }; // prettier-ignore export const indexOf = {transform: range}; From d147e8106fdbdde4c0483bcb4fad9670a2f69f62 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 10:14:20 -0400 Subject: [PATCH 12/21] shorten slightly --- src/options.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/options.js b/src/options.js index 2de25c2f2b..424884b0f3 100644 --- a/src/options.js +++ b/src/options.js @@ -59,9 +59,7 @@ function maybeTypedArrayify(data, type) { ? arrayify(data) // preserve undefined type : data instanceof type ? data - : isNumberType(type) && !isNumberArray(data) - ? type.from(data, coerceNumber) - : type.from(data); + : type.from(data, isNumberType(type) && !isNumberArray(data) ? coerceNumber : undefined); } function maybeTypedArrowify(vector, type = Array) { From 6fc974f29fe92ee3f49d42a4d243447d8985bfb1 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 13:18:54 -0400 Subject: [PATCH 13/21] valueof tests; better arrow coercion --- src/options.js | 14 ++++++++++++-- test/valueof-test.js | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 test/valueof-test.js diff --git a/src/options.js b/src/options.js index 424884b0f3..7192013823 100644 --- a/src/options.js +++ b/src/options.js @@ -57,13 +57,17 @@ function maybeTypedMap(data, f, type) { function maybeTypedArrayify(data, type) { return type === undefined ? arrayify(data) // preserve undefined type + : isArrowVector(data) + ? maybeTypedArrowify(data, type) : data instanceof type ? data : type.from(data, isNumberType(type) && !isNumberArray(data) ? coerceNumber : undefined); } -function maybeTypedArrowify(vector, type = Array) { - return type === Array && isArrowDateType(vector.type) +function maybeTypedArrowify(vector, type) { + return vector == null + ? vector + : (type === undefined || type === Array) && isArrowDateType(vector.type) ? coerceDates(vector.toArray()) : maybeTypedArrayify(vector.toArray(), type); } @@ -155,6 +159,7 @@ export function keyword(input, name, allowed) { // Promotes the specified data to an array as needed. export function arrayify(values) { if (values == null || isArray(values)) return values; + if (isArrowVector(values)) return maybeTypedArrowify(values); switch (values.type) { case "FeatureCollection": return values.features; @@ -604,11 +609,16 @@ export function isArrowTable(value) { ); } +function isArrowVector(value) { + return value && typeof value.toArray === "function" && value.type; +} + // Apache Arrow now represents dates as numbers. We currently only support // implicit coercion to JavaScript Date objects when the numbers represent // milliseconds since Unix epoch. function isArrowDateType(type) { return ( + type && (type.typeId === 8 || // date type.typeId === 10) && // timestamp type.unit === 1 // millisecond diff --git a/test/valueof-test.js b/test/valueof-test.js new file mode 100644 index 0000000000..6cea3fcbed --- /dev/null +++ b/test/valueof-test.js @@ -0,0 +1,46 @@ +import assert from "node:assert"; +import * as Arrow from "apache-arrow"; +import {valueof} from "../src/index.js"; + +describe("valueof(data, value, type)", () => { + it("allows data to be an Arrow Table", () => { + const data = Arrow.tableFromArrays({a: [1, 2, 3], b: Int32Array.of(4, 5, 6), c: "abc"}); + assert.deepStrictEqual(valueof(data, "a"), Float64Array.of(1, 2, 3)); + assert.deepStrictEqual(valueof(data, "b"), Int32Array.of(4, 5, 6)); + assert.deepStrictEqual(valueof(data, "c"), ["a", "b", "c"]); + }); + it("allows data to be an Arrow Table, returning null if the column doesn’t exist", () => { + const data = Arrow.tableFromArrays({a: [1, 2, 3], b: Int32Array.of(4, 5, 6), c: "abc"}); + assert.deepStrictEqual(valueof(data, "unknown"), null); + assert.deepStrictEqual(valueof(data, "unknown", Array), null); + assert.deepStrictEqual(valueof(data, "unknown", Float64Array), null); + }); + it("allows value to be an Arrow Vector, ignoring data", () => { + const data = Arrow.tableFromArrays({a: [1, 2, 3], b: Int32Array.of(4, 5, 6), c: "abc"}); + assert.deepStrictEqual(valueof(null, data.getChild("a")), Float64Array.of(1, 2, 3)); + assert.deepStrictEqual(valueof(null, data.getChild("b")), Int32Array.of(4, 5, 6)); + assert.deepStrictEqual(valueof(null, data.getChild("c")), ["a", "b", "c"]); + }); + it("returns an array of Date for Arrow Date types, when not specifying a type", () => { + const dates = [2000, 2001, 2002].map((y) => new Date(`${y}`)); + const data = Arrow.tableFromArrays({dates}); + assert.deepStrictEqual(valueof(data, "dates"), dates); + assert.deepStrictEqual(valueof(null, data.getChild("dates")), dates); + }); + it("returns an array of Date for Arrow Date types, when asking for an Array", () => { + const dates = [2000, 2001, 2002].map((y) => new Date(`${y}`)); + const data = Arrow.tableFromArrays({dates}); + assert.deepStrictEqual(valueof(data, "dates", Array), dates); + assert.deepStrictEqual(valueof(null, data.getChild("dates"), Array), dates); + }); + it("returns a typed array for Arrow Date types, when asking for a typed array", () => { + const dates = [2000, 2001, 2002].map((y) => new Date(`${y}`)); + const floats = Float64Array.from(dates); + const bigints = BigInt64Array.from(floats, BigInt); + const data = Arrow.tableFromArrays({dates}); + assert.deepStrictEqual(valueof(data, "dates", Float64Array), floats); + assert.deepStrictEqual(valueof(null, data.getChild("dates"), Float64Array), floats); + assert.deepStrictEqual(valueof(data, "dates", BigInt64Array), bigints); + assert.deepStrictEqual(valueof(null, data.getChild("dates"), BigInt64Array), bigints); + }); +}); From b6521310ed6fccaa3e8bd922647392d385ef70bd Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 13:26:15 -0400 Subject: [PATCH 14/21] Arrow-aware stack transform --- src/options.js | 6 +++++- src/transforms/exclusiveFacets.js | 4 ++-- src/transforms/stack.js | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/options.js b/src/options.js index 7192013823..40fa4a8f3b 100644 --- a/src/options.js +++ b/src/options.js @@ -260,9 +260,13 @@ export function maybeZ({z, fill, stroke} = {}) { return z; } +export function lengthof(data) { + return isArray(data) ? data.length : data.numRows; +} + // Returns a Uint32Array with elements [0, 1, 2, … data.length - 1]. export function range(data) { - const n = isArray(data) ? data.length : data.numRows; + const n = lengthof(data); const r = new Uint32Array(n); for (let i = 0; i < n; ++i) r[i] = i; return r; diff --git a/src/transforms/exclusiveFacets.js b/src/transforms/exclusiveFacets.js index facf94bfb8..3a560a065e 100644 --- a/src/transforms/exclusiveFacets.js +++ b/src/transforms/exclusiveFacets.js @@ -1,9 +1,9 @@ -import {reindex, slice} from "../options.js"; +import {lengthof, reindex, slice} from "../options.js"; export function exclusiveFacets(data, facets) { if (facets.length === 1) return {data, facets}; // only one facet; trivially exclusive - const n = data.length; + const n = lengthof(data); const O = new Uint8Array(n); let overlaps = 0; diff --git a/src/transforms/stack.js b/src/transforms/stack.js index 9c96997af0..b18c6ef3c8 100644 --- a/src/transforms/stack.js +++ b/src/transforms/stack.js @@ -2,7 +2,7 @@ import {InternMap, cumsum, greatest, group, groupSort, max, min, rollup, sum} fr import {ascendingDefined, descendingDefined} from "../defined.js"; import {withTip} from "../mark.js"; import {maybeApplyInterval, maybeColumn, maybeZ, maybeZero} from "../options.js"; -import {column, field, mid, one, range, valueof} from "../options.js"; +import {column, field, lengthof, mid, one, range, valueof} from "../options.js"; import {basic} from "./basic.js"; import {exclusiveFacets} from "./exclusiveFacets.js"; @@ -91,7 +91,7 @@ function stack(x, y = one, kx, ky, {offset, order, reverse}, options) { const Y = valueof(data, y, Float64Array); const Z = valueof(data, z); const compare = order && order(data, X, Y, Z); - const n = data.length; + const n = lengthof(data); const Y1 = setY1(new Float64Array(n)); const Y2 = setY2(new Float64Array(n)); const facetstacks = []; From 4faebbc00e73c317823da89a94fac5c7eef0af4e Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 28 Jul 2024 13:47:00 -0400 Subject: [PATCH 15/21] a few more dataify --- src/mark.js | 6 +----- src/options.js | 7 ++++++- src/plot.js | 8 ++++---- src/transforms/basic.js | 4 ++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/mark.js b/src/mark.js index a5c0788b9e..462af1577e 100644 --- a/src/mark.js +++ b/src/mark.js @@ -2,7 +2,7 @@ import {channelDomain, createChannels, valueObject} from "./channel.js"; import {defined} from "./defined.js"; import {maybeFacetAnchor} from "./facet.js"; import {maybeClip, maybeNamed, maybeValue} from "./options.js"; -import {arrayify, isArrowTable, isDomainSort, isObject, isOptions, keyword, range, singleton} from "./options.js"; +import {dataify, isDomainSort, isObject, isOptions, keyword, range, singleton} from "./options.js"; import {project} from "./projection.js"; import {styles} from "./style.js"; import {basic, initializer} from "./transforms/basic.js"; @@ -130,10 +130,6 @@ export class Mark { } } -function dataify(data) { - return isArrowTable(data) ? data : arrayify(data); -} - export function marks(...marks) { marks.plot = Mark.prototype.plot; return marks; diff --git a/src/options.js b/src/options.js index 40fa4a8f3b..934a5966cf 100644 --- a/src/options.js +++ b/src/options.js @@ -156,6 +156,11 @@ export function keyword(input, name, allowed) { return i; } +// Like arrayify, but also allows data to be an Apache Arrow Table. +export function dataify(data) { + return isArrowTable(data) ? data : arrayify(data); +} + // Promotes the specified data to an array as needed. export function arrayify(values) { if (values == null || isArray(values)) return values; @@ -261,7 +266,7 @@ export function maybeZ({z, fill, stroke} = {}) { } export function lengthof(data) { - return isArray(data) ? data.length : data.numRows; + return isArray(data) ? data.length : data?.numRows; } // Returns a Uint32Array with elements [0, 1, 2, … data.length - 1]. diff --git a/src/plot.js b/src/plot.js index a091d8d8a8..23dc93f7f7 100644 --- a/src/plot.js +++ b/src/plot.js @@ -10,7 +10,7 @@ import {axisFx, axisFy, axisX, axisY, gridFx, gridFy, gridX, gridY} from "./mark import {frame} from "./marks/frame.js"; import {tip} from "./marks/tip.js"; import {isColor, isIterable, isNone, isScaleOptions} from "./options.js"; -import {arrayify, map, yes, maybeIntervalTransform, subarray} from "./options.js"; +import {dataify, lengthof, map, yes, maybeIntervalTransform, subarray} from "./options.js"; import {createProjection, getGeometryChannels, hasProjection} from "./projection.js"; import {createScales, createScaleFunctions, autoScaleRange, exposeScales} from "./scales.js"; import {innerDimensions, outerDimensions} from "./scales.js"; @@ -459,7 +459,7 @@ function maybeTopFacet(facet, options) { if (facet == null) return; const {x, y} = facet; if (x == null && y == null) return; - const data = arrayify(facet.data); + const data = dataify(facet.data); if (data == null) throw new Error("missing facet data"); const channels = {}; if (x != null) channels.fx = createChannel(data, {value: x, scale: "fx"}); @@ -478,7 +478,7 @@ function maybeMarkFacet(mark, topFacetState, options) { // here with maybeTopFacet that we could reduce. const {fx, fy} = mark; if (fx != null || fy != null) { - const data = arrayify(mark.data ?? fx ?? fy); + const data = dataify(mark.data ?? fx ?? fy); if (data === undefined) throw new Error(`missing facet data in ${mark.ariaLabel}`); if (data === null) return; // ignore channel definitions if no data is provided TODO this right? const channels = {}; @@ -500,7 +500,7 @@ function maybeMarkFacet(mark, topFacetState, options) { if ( data.length > 0 && (groups.size > 1 || (groups.size === 1 && channels.fx && channels.fy && [...groups][0][1].size > 1)) && - arrayify(mark.data)?.length === data.length + lengthof(dataify(mark.data)) === lengthof(data) ) { warn( `Warning: the ${mark.ariaLabel} mark appears to use faceted data, but isn’t faceted. The mark data has the same length as the facet data and the mark facet option is "auto", but the mark data and facet data are distinct. If this mark should be faceted, set the mark facet option to true; otherwise, suppress this warning by setting the mark facet option to false.` diff --git a/src/transforms/basic.js b/src/transforms/basic.js index 573032fa47..cd846d8ef1 100644 --- a/src/transforms/basic.js +++ b/src/transforms/basic.js @@ -1,6 +1,6 @@ import {randomLcg} from "d3"; import {ascendingDefined, descendingDefined} from "../defined.js"; -import {arrayify, isDomainSort, isOptions, maybeValue, valueof} from "../options.js"; +import {dataify, isDomainSort, isOptions, maybeValue, valueof} from "../options.js"; export function basic({filter: f1, sort: s1, reverse: r1, transform: t1, initializer: i1, ...options} = {}, transform) { // If both t1 and t2 are defined, returns a composite transform that first @@ -40,7 +40,7 @@ function composeTransform(t1, t2) { if (t2 == null) return t1 === null ? undefined : t1; return function (data, facets, plotOptions) { ({data, facets} = t1.call(this, data, facets, plotOptions)); - return t2.call(this, arrayify(data), facets, plotOptions); + return t2.call(this, dataify(data), facets, plotOptions); }; } From 107363d20b3c260e81a8680be95a1bce51fd8082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Sun, 4 Aug 2024 22:35:14 -0400 Subject: [PATCH 16/21] fix merge conflict --- yarn.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarn.lock b/yarn.lock index 378ab52bd7..e36d1ea85d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2150,7 +2150,7 @@ find-replace@^3.0.0: dependencies: array-back "^3.0.1" -find-up@5.0.0, find-up@^5.0.0: +find-up@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== From bd22e0bf3621146811bde692b5c4ba4429c2d467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Sun, 4 Aug 2024 23:01:21 -0400 Subject: [PATCH 17/21] fix Plot.find and stack customOrder --- src/transforms/group.js | 2 ++ src/transforms/stack.js | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/transforms/group.js b/src/transforms/group.js index 07ae348359..0f9ad95a57 100644 --- a/src/transforms/group.js +++ b/src/transforms/group.js @@ -16,6 +16,7 @@ import { } from "d3"; import {ascendingDefined} from "../defined.js"; import { + arrayify, column, identity, isObject, @@ -444,6 +445,7 @@ export function find(test) { if (typeof test !== "function") throw new Error(`invalid test function: ${test}`); return { reduceIndex(I, V, {data}) { + data = arrayify(data); return V[I.find((i) => test(data[i], i, data))]; } }; diff --git a/src/transforms/stack.js b/src/transforms/stack.js index b18c6ef3c8..5a03bbf84e 100644 --- a/src/transforms/stack.js +++ b/src/transforms/stack.js @@ -2,7 +2,7 @@ import {InternMap, cumsum, greatest, group, groupSort, max, min, rollup, sum} fr import {ascendingDefined, descendingDefined} from "../defined.js"; import {withTip} from "../mark.js"; import {maybeApplyInterval, maybeColumn, maybeZ, maybeZero} from "../options.js"; -import {column, field, lengthof, mid, one, range, valueof} from "../options.js"; +import {arrayify, column, field, lengthof, mid, one, range, valueof} from "../options.js"; import {basic} from "./basic.js"; import {exclusiveFacets} from "./exclusiveFacets.js"; @@ -327,7 +327,10 @@ function orderAccessor(f) { } function orderComparator(f) { - return (data) => (i, j) => f(data[i], data[j]); + return (data) => { + data = arrayify(data); + return (i, j) => f(data[i], data[j]); + }; } function orderGiven(domain) { From d0d0b70249c276802584df02e43b3e51ddf6a3e2 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 5 Aug 2024 09:44:51 -0400 Subject: [PATCH 18/21] handle Arrow in a few more places --- src/interactions/pointer.js | 7 ++++++- src/options.js | 4 ++-- src/transforms/basic.js | 7 +++++-- src/transforms/group.js | 42 ++++++------------------------------- src/transforms/stack.js | 7 +++---- 5 files changed, 22 insertions(+), 45 deletions(-) diff --git a/src/interactions/pointer.js b/src/interactions/pointer.js index c92addcff7..f0c0f765b0 100644 --- a/src/interactions/pointer.js +++ b/src/interactions/pointer.js @@ -1,5 +1,6 @@ import {pointer as pointof} from "d3"; import {composeRender} from "../mark.js"; +import {isArray} from "../options.js"; import {applyFrameAnchor} from "../style.js"; const states = new WeakMap(); @@ -126,7 +127,11 @@ function pointerK(kx, ky, {x, y, px, py, maxRadius = 40, channels, render, ...op // Dispatch the value. When simultaneously exiting this facet and // entering a new one, prioritize the entering facet. - if (!(i == null && facetState?.size > 1)) context.dispatchValue(i == null ? null : data[i]); + if (!(i == null && facetState?.size > 1)) { + const value = i == null ? null : isArray(data) ? data[i] : data.get(i); + context.dispatchValue(value); + } + return r; } diff --git a/src/options.js b/src/options.js index 934a5966cf..ac9caca472 100644 --- a/src/options.js +++ b/src/options.js @@ -7,7 +7,7 @@ import {timeInterval, utcInterval} from "./time.js"; export const TypedArray = Object.getPrototypeOf(Uint8Array); const objectToString = Object.prototype.toString; -function isArray(value) { +export function isArray(value) { return value instanceof Array || value instanceof TypedArray; } @@ -608,7 +608,7 @@ export function maybeClip(clip) { } // https://github.com/observablehq/stdlib/blob/746ca2e69135df6178e4f3a17244def35d8d6b20/src/arrow.js#L4C1-L17C1 -export function isArrowTable(value) { +function isArrowTable(value) { return ( value && typeof value.getChild === "function" && diff --git a/src/transforms/basic.js b/src/transforms/basic.js index cd846d8ef1..8146ae9381 100644 --- a/src/transforms/basic.js +++ b/src/transforms/basic.js @@ -1,6 +1,7 @@ import {randomLcg} from "d3"; import {ascendingDefined, descendingDefined} from "../defined.js"; -import {dataify, isDomainSort, isOptions, maybeValue, valueof} from "../options.js"; +import {isArray, isDomainSort, isOptions} from "../options.js"; +import {dataify, maybeValue, valueof} from "../options.js"; export function basic({filter: f1, sort: s1, reverse: r1, transform: t1, initializer: i1, ...options} = {}, transform) { // If both t1 and t2 are defined, returns a composite transform that first @@ -101,7 +102,9 @@ function sortTransform(value) { function sortData(compare) { return (data, facets) => { - const compareData = (i, j) => compare(data[i], data[j]); + const compareData = isArray(data) + ? (i, j) => compare(data[i], data[j]) + : (i, j) => compare(data.get(i), data.get(j)); return {data, facets: facets.map((I) => I.slice().sort(compareData))}; }; } diff --git a/src/transforms/group.js b/src/transforms/group.js index 0f9ad95a57..b3320efd65 100644 --- a/src/transforms/group.js +++ b/src/transforms/group.js @@ -1,38 +1,9 @@ -import { - InternSet, - deviation, - group as grouper, - max, - maxIndex, - mean, - median, - min, - minIndex, - mode, - rollup, - sort, - sum, - variance -} from "d3"; +import {InternSet, group as grouper, rollup, sort} from "d3"; +import {deviation, max, maxIndex, mean, median, min, minIndex, mode, sum, variance} from "d3"; import {ascendingDefined} from "../defined.js"; -import { - arrayify, - column, - identity, - isObject, - isTemporal, - labelof, - maybeApplyInterval, - maybeColorChannel, - maybeColumn, - maybeInput, - maybeTuple, - percentile, - range, - second, - take, - valueof -} from "../options.js"; +import {maybeApplyInterval, maybeColorChannel, maybeColumn, maybeInput, maybeTuple} from "../options.js"; +import {isArray, isObject, isTemporal} from "../options.js"; +import {column, identity, labelof, percentile, range, second, take, valueof} from "../options.js"; import {basic} from "./basic.js"; // Group on {z, fill, stroke}. @@ -445,8 +416,7 @@ export function find(test) { if (typeof test !== "function") throw new Error(`invalid test function: ${test}`); return { reduceIndex(I, V, {data}) { - data = arrayify(data); - return V[I.find((i) => test(data[i], i, data))]; + return V[I.find(isArray(data) ? (i) => test(data[i], i, data) : (i) => test(data.get(i), i, data))]; } }; } diff --git a/src/transforms/stack.js b/src/transforms/stack.js index 5a03bbf84e..20d7342e03 100644 --- a/src/transforms/stack.js +++ b/src/transforms/stack.js @@ -2,7 +2,7 @@ import {InternMap, cumsum, greatest, group, groupSort, max, min, rollup, sum} fr import {ascendingDefined, descendingDefined} from "../defined.js"; import {withTip} from "../mark.js"; import {maybeApplyInterval, maybeColumn, maybeZ, maybeZero} from "../options.js"; -import {arrayify, column, field, lengthof, mid, one, range, valueof} from "../options.js"; +import {column, field, isArray, lengthof, mid, one, range, valueof} from "../options.js"; import {basic} from "./basic.js"; import {exclusiveFacets} from "./exclusiveFacets.js"; @@ -252,7 +252,7 @@ function maybeOrder(order, offset, ky) { return orderAccessor(field(order)); } if (typeof order === "function") return (order.length === 1 ? orderAccessor : orderComparator)(order); - if (Array.isArray(order)) return orderGiven(order); + if (isArray(order)) return orderGiven(order); throw new Error(`invalid order: ${order}`); } @@ -328,8 +328,7 @@ function orderAccessor(f) { function orderComparator(f) { return (data) => { - data = arrayify(data); - return (i, j) => f(data[i], data[j]); + return isArray(data) ? (i, j) => f(data[i], data[j]) : (i, j) => f(data.get(i), data.get(j)); }; } From 1f003720e013375c2bebfd292078b9dbb0490c1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 5 Aug 2024 11:04:05 -0400 Subject: [PATCH 19/21] fix tree.js and add various tests --- src/transforms/tree.js | 6 +- test/output/arrowTestCustomOrder.svg | 134 +++++++++++++++++++++++++++ test/output/arrowTestDifferenceY.svg | 78 ++++++++++++++++ test/output/arrowTestSort.svg | 50 ++++++++++ test/output/arrowTestTree.svg | 47 ++++++++++ test/plots/arrow.ts | 97 +++++++++++++++++++ 6 files changed, 410 insertions(+), 2 deletions(-) create mode 100644 test/output/arrowTestCustomOrder.svg create mode 100644 test/output/arrowTestDifferenceY.svg create mode 100644 test/output/arrowTestSort.svg create mode 100644 test/output/arrowTestTree.svg diff --git a/src/transforms/tree.js b/src/transforms/tree.js index 067fc03229..ae88a76592 100644 --- a/src/transforms/tree.js +++ b/src/transforms/tree.js @@ -1,6 +1,6 @@ import {stratify, tree} from "d3"; import {ascendingDefined} from "../defined.js"; -import {column, identity, isObject, one, valueof} from "../options.js"; +import {column, identity, isArray, isObject, one, valueof} from "../options.js"; import {basic} from "./basic.js"; export function treeNode({ @@ -40,7 +40,9 @@ export function treeNode({ for (const o of outputs) o[output_values] = o[output_setValues]([]); for (const facet of facets) { const treeFacet = []; - const root = rootof(facet.filter((i) => P[i] != null)).each((node) => (node.data = data[node.data])); + const root = rootof(facet.filter((i) => P[i] != null)).each( + isArray(data) ? (node) => (node.data = data[node.data]) : (node) => (node.data = data.get(node.data)) + ); if (treeSort != null) root.sort(treeSort); layout(root); for (const node of root.descendants()) { diff --git a/test/output/arrowTestCustomOrder.svg b/test/output/arrowTestCustomOrder.svg new file mode 100644 index 0000000000..0729686f1d --- /dev/null +++ b/test/output/arrowTestCustomOrder.svg @@ -0,0 +1,134 @@ + + + + + + 0 + 2 + 4 + 6 + 8 + 10 + 12 + 14 + 16 + 18 + 20 + 22 + + + ↑ Annual revenue (billions, adj.) + + + + 1975 + 1980 + 1985 + 1990 + 1995 + 2000 + 2005 + 2010 + 2015 + + + 8 - Track + Tape + CD + Disc + CD Single + Disc + Cassette + Tape + Cassette Single + Tape + DVD Audio + Other + Download Album + Download + Download Music Video + Download + Download Single + Download + Kiosk + Other + LP/EP + Vinyl + Limited Tier Paid Subscription + Streaming + Music Video (Physical) + Other + On-Demand Streaming (Ad-Supported) + Streaming + Other Ad-Supported Streaming + Streaming + Other Digital + Download + Other Tapes + Tape + Paid Subscription + Streaming + Ringtones & Ringbacks + Download + SACD + Disc + SoundExchange Distributions + Streaming + Synchronization + Other + Vinyl Single + Vinyl + + + + + \ No newline at end of file diff --git a/test/output/arrowTestDifferenceY.svg b/test/output/arrowTestDifferenceY.svg new file mode 100644 index 0000000000..0ae2fbf577 --- /dev/null +++ b/test/output/arrowTestDifferenceY.svg @@ -0,0 +1,78 @@ + + + + + 1.0 + 1.2 + 1.4 + 1.6 + 1.8 + 2.0 + 2.2 + 2.4 + 2.6 + 2.8 + + + ↑ Close + + + + 2014 + 2015 + 2016 + 2017 + 2018 + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/output/arrowTestSort.svg b/test/output/arrowTestSort.svg new file mode 100644 index 0000000000..2dd19e56c0 --- /dev/null +++ b/test/output/arrowTestSort.svg @@ -0,0 +1,50 @@ + + + + + 0 + 10 + 20 + 30 + 40 + 50 + 60 + 70 + 80 + 90 + 100 + + + age → + + + + + + + \ No newline at end of file diff --git a/test/output/arrowTestTree.svg b/test/output/arrowTestTree.svg new file mode 100644 index 0000000000..2ad47dc9c1 --- /dev/null +++ b/test/output/arrowTestTree.svg @@ -0,0 +1,47 @@ + + + + + + + + + + + + + /Chaos + /Chaos/Eros + /Chaos/Erebus + /Chaos/Tartarus + /Chaos/Gaia + /Chaos/Gaia/Mountains + /Chaos/Gaia/Pontus + /Chaos/Gaia/Uranus + + + Eros/Chaos/Eros + Erebus/Chaos/Erebus + Tartarus/Chaos/Tartarus + Mountains/Chaos/Gaia/Mountains + Pontus/Chaos/Gaia/Pontus + Uranus/Chaos/Gaia/Uranus + + + Chaos/Chaos + Gaia/Chaos/Gaia + + \ No newline at end of file diff --git a/test/plots/arrow.ts b/test/plots/arrow.ts index 5699e765fc..97b3c98e6f 100644 --- a/test/plots/arrow.ts +++ b/test/plots/arrow.ts @@ -50,3 +50,100 @@ export async function arrowTestGroup() { const data = Arrow.tableFromArrays({category, vector}); return Plot.barY(data, Plot.groupX({y: "count"}, {x: "vector", fill: "category"})).plot({marginLeft: 60}); } + +/** + * An arrow table dataset supports sorting with a comparator. + */ +export async function arrowTestSort() { + const data = Arrow.tableFromArrays({ + id: [1, 2, 3], + name: ["Alice", "Bob", "Charlie"], + age: [35, 25, 45] + }); + return Plot.barX(data, {x: "age", fill: "name", sort: (a: {age: number}, b: {age: number}) => b.age - a.age}).plot(); +} + +/** + * An arrow table dataset supports accessing the node's datum. + */ +export async function arrowTestTree() { + const gods = Arrow.tableFromArrays({ + branch: `Chaos Gaia Mountains +Chaos Gaia Pontus +Chaos Gaia Uranus +Chaos Eros +Chaos Erebus +Chaos Tartarus` + .split("\n") + .map((d) => d.replace(/\s+/g, "/")) + }); + return Plot.plot({ + axis: null, + insetLeft: 35, + insetTop: 20, + insetBottom: 20, + insetRight: 120, + marks: [Plot.tree(gods, {path: "branch", fill: (d) => d?.branch})] + }); +} + +/** + * An arrow table dataset supports Plot.find. + */ +export async function arrowTestDifferenceY() { + const stocks = Arrow.tableFromJSON(await readStocks()); + return Plot.plot({ + marks: [ + Plot.differenceY( + stocks, + Plot.normalizeY( + Plot.groupX( + {y1: Plot.find((d) => d.Symbol === "GOOG"), y2: Plot.find((d) => d.Symbol === "AAPL")}, + {x: "Date", y: "Close", tip: true} + ) + ) + ) + ] + }); +} + +async function readStocks(start = 0, end = Infinity) { + return ( + await Promise.all( + ["AAPL", "GOOG"].map((symbol) => + d3.csv(`data/${symbol.toLowerCase()}.csv`, (d, i) => + start <= i && i < end ? ((d.Symbol = symbol), d3.autoType(d)) : null + ) + ) + ) + ).flat(); +} + +/** + * An arrow table dataset supports stack custom order. + */ +export async function arrowTestCustomOrder() { + const riaa = Arrow.tableFromJSON(await d3.csv("data/riaa-us-revenue.csv", d3.autoType)); + return Plot.plot({ + y: { + grid: true, + label: "Annual revenue (billions, adj.)", + transform: (d) => d / 1000 + }, + marks: [ + Plot.areaY( + riaa, + Plot.stackY({ + x: "year", + y: "revenue", + z: "format", + order: (a, b) => d3.ascending(a.group, b.group) || d3.descending(a.revenue, b.revenue), + fill: "group", + stroke: "white", + title: (d) => `${d.format}\n${d.group}` + }) + ), + Plot.ruleY([0]) + ] + }); +} From 9c5f7ad4ae9eb72e11b3d484702c9d346f7c658a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 5 Aug 2024 11:12:05 -0400 Subject: [PATCH 20/21] test pointer --- test/output/arrowTestPointer.html | 403 ++++++++++++++++++++++++++++++ test/plots/arrow.ts | 14 ++ 2 files changed, 417 insertions(+) create mode 100644 test/output/arrowTestPointer.html diff --git a/test/output/arrowTestPointer.html b/test/output/arrowTestPointer.html new file mode 100644 index 0000000000..d91b22d848 --- /dev/null +++ b/test/output/arrowTestPointer.html @@ -0,0 +1,403 @@ +
+ + + + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + + + ↑ culmen_depth_mm + + + + 35 + 40 + 45 + 50 + 55 + + + culmen_length_mm → + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/test/plots/arrow.ts b/test/plots/arrow.ts index 97b3c98e6f..19ad4ca28b 100644 --- a/test/plots/arrow.ts +++ b/test/plots/arrow.ts @@ -1,6 +1,7 @@ import * as Plot from "@observablehq/plot"; import * as d3 from "d3"; import * as Arrow from "apache-arrow"; +import {html} from "htl"; /** * An arrow table dataset supports direct (getChild) accessors. @@ -147,3 +148,16 @@ export async function arrowTestCustomOrder() { ] }); } + +/** + * An arrow table dataset works with the pointer. + */ +export async function arrowTestPointer() { + const penguins = Arrow.tableFromJSON(await d3.csv("data/penguins.csv", d3.autoType)); + const plot = Plot.dot(penguins, {x: "culmen_length_mm", y: "culmen_depth_mm", tip: true}).plot(); + const textarea = html`