From 20bc4ff296ec839a9fdeb44c45a4c942b629664e Mon Sep 17 00:00:00 2001 From: Salvatore Previti Date: Wed, 17 May 2023 12:54:28 +0100 Subject: [PATCH] Serialization and deserialization to file, new serialization formats (#53) Add the ability to serialize and deserialize a file directly with serializeFileAsync and deserializeFileAsync methods - those two are faster than loading a file in memory and deserialize/serialize in memory and are fully asynchronous. They use mmap if possible. Adds the ability to serialize to file text format and deserialize from buffer and from file text formats: newline_separated_values, comma_separated_values, tab_separated_values, json_array Adds the ability to serialize and deserialize a binary array of little endian uint32_t values remove COW enabled by default, it could cause issues with asynchronous operations and multi threading - will provide a flag in a new version --- .vscode/settings.json | 78 +- index.d.ts | 175 +- index.js | 37 + package-lock.json | 138 +- package.json | 12 +- roaring-node.cpp | 1519 ++++++++++++++--- src/cpp/RoaringBitmap32-main.h | 4 +- src/cpp/RoaringBitmap32-ranges.h | 1 - src/cpp/RoaringBitmap32-serialization.h | 69 +- src/cpp/RoaringBitmap32.h | 4 +- src/cpp/WorkerError.h | 43 + src/cpp/async-workers.h | 157 +- src/cpp/memory.h | 4 +- src/cpp/mmap.h | 106 ++ src/cpp/serialization-csv.h | 208 +++ src/cpp/serialization-format.h | 82 + src/cpp/serialization.h | 582 +++++-- submodules/CRoaring | 2 +- .../RoaringBitmap32.frozen.test.ts | 14 + ...RoaringBitmap32.serialization-file.test.ts | 180 ++ .../RoaringBitmap32.serialization.test.ts | 77 + test/roaring.test.ts | 49 +- tsconfig.json | 2 +- 23 files changed, 2843 insertions(+), 700 deletions(-) create mode 100644 src/cpp/WorkerError.h create mode 100644 src/cpp/mmap.h create mode 100644 src/cpp/serialization-csv.h create mode 100644 test/RoaringBitmap32/RoaringBitmap32.serialization-file.test.ts diff --git a/.vscode/settings.json b/.vscode/settings.json index 24ae60b..98bdbb8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -77,84 +77,24 @@ }, "files.associations": { "__bit_reference": "cpp", - "__bits": "cpp", - "__config": "cpp", - "__debug": "cpp", - "__errc": "cpp", - "__hash_table": "cpp", - "__locale": "cpp", - "__mutex_base": "cpp", "__node_handle": "cpp", - "__nullptr": "cpp", - "__split_buffer": "cpp", - "__string": "cpp", - "__threading_support": "cpp", - "__tree": "cpp", - "__tuple": "cpp", - "array": "cpp", - "atomic": "cpp", "bitset": "cpp", - "cctype": "cpp", - "chrono": "cpp", - "cinttypes": "cpp", - "clocale": "cpp", - "cmath": "cpp", - "compare": "cpp", - "complex": "cpp", - "concepts": "cpp", - "cstdarg": "cpp", - "cstddef": "cpp", - "cstdint": "cpp", - "cstdio": "cpp", - "cstdlib": "cpp", - "cstring": "cpp", - "ctime": "cpp", - "cwchar": "cpp", - "cwctype": "cpp", "deque": "cpp", - "exception": "cpp", - "fstream": "cpp", - "initializer_list": "cpp", - "iomanip": "cpp", - "ios": "cpp", - "iosfwd": "cpp", - "iostream": "cpp", - "istream": "cpp", + "__memory": "cpp", "limits": "cpp", - "locale": "cpp", - "map": "cpp", - "memory": "cpp", - "mutex": "cpp", - "new": "cpp", - "numeric": "cpp", "optional": "cpp", - "ostream": "cpp", - "queue": "cpp", - "random": "cpp", "ratio": "cpp", - "set": "cpp", - "sstream": "cpp", - "stack": "cpp", - "stdexcept": "cpp", - "streambuf": "cpp", - "string": "cpp", - "string_view": "cpp", "system_error": "cpp", "tuple": "cpp", "type_traits": "cpp", - "typeinfo": "cpp", - "unordered_map": "cpp", - "variant": "cpp", "vector": "cpp", - "algorithm": "cpp", - "roaring.c": "cpp", - "isadetection.c": "cpp", - "array_util.c": "cpp", - "bitset.c": "cpp", - "bitset_util.c": "cpp", - "containers.c": "cpp", - "__verbose_abort": "cpp", - "bit": "cpp", - "filesystem": "cpp" + "chrono": "cpp", + "filesystem": "cpp", + "random": "cpp", + "__config": "cpp", + "__nullptr": "cpp", + "atomic": "cpp", + "locale": "cpp", + "cstddef": "cpp" } } diff --git a/index.d.ts b/index.d.ts index bd6fed2..af44488 100644 --- a/index.d.ts +++ b/index.d.ts @@ -157,6 +157,61 @@ export enum SerializationFormat { * */ unsafe_frozen_croaring = "unsafe_frozen_croaring", + + /** + * A plain binary array of 32 bits integers in little endian format. 4 bytes per value. + */ + uint32_array = "uint32_array", +} + +export enum FileSerializationFormat { + /** + * Stable Optimized non portable C/C++ format. Used by croaring. Can be smaller than the portable format. + */ + croaring = "croaring", + + /** + * Stable Portable Java and Go format. + */ + portable = "unsafe_portable", + + /** + * A plain binary array of 32 bits integers in little endian format. 4 bytes per value. + */ + uint32_array = "uint32_array", + + /** + * Non portable C/C++ frozen format. + * Is considered unsafe and unstable because the format might change at any new version. + * Can be useful for temporary storage or for sending data over the network between similar machines. + * If the content is corrupted when deserialized or when a frozen view is create, the behavior is undefined! + * The application may crash, buffer overrun, could be a vector of attack! + * + * When this option is used in the serialize function, the new returned buffer (if no buffer was provided) will be aligned to a 32 bytes boundary. + * This is required to create a frozen view with the method unsafeFrozenView. + * + */ + unsafe_frozen_croaring = "unsafe_frozen_croaring", + + /** + * Comma separated values, all values are in decimal and in one line without spaces or other characters. + */ + comma_separated_values = "comma_separated_values", + + /** + * Tab "\t" separated values, all values are in decimal and in one line without other characters. + */ + tab_separated_values = "tab_separated_values", + + /** + * Newline (\n) separated values, all values are in decimal and one per line with a terminating newline. + */ + newline_separated_values = "newline_separated_values", + + /** + * A JSON file in the format "[1,2,3,4...]" + */ + json_array = "json_array", } export type SerializationFormatType = @@ -164,8 +219,21 @@ export type SerializationFormatType = | "croaring" | "portable" | "unsafe_frozen_croaring" + | "uint32_array" | boolean; +export type FileSerializationFormatType = + | SerializationFormatType + | FileSerializationFormat + | "comma_separated_values" + | "tab_separated_values" + | "newline_separated_values" + | "json_array"; + +export type SerializationDeserializationFormatType = SerializationFormatType & DeserializationFormatType; + +export type FileSerializationDeserializationFormatType = FileSerializationFormatType & FileDeserializationFormatType; + export enum DeserializationFormat { /** Stable Optimized non portable C/C++ format. Used by croaring. Can be smaller than the portable format. */ croaring = "croaring", @@ -190,16 +258,69 @@ export enum DeserializationFormat { * The application may crash, buffer overrun, could be a vector of attack! */ unsafe_frozen_portable = "unsafe_frozen_portable", + + /** + * A plain binary array of 32 bits integers in little endian format. 4 bytes per value. + */ + uint32_array = "uint32_array", + + comma_separated_values = "comma_separated_values", + tab_separated_values = "tab_separated_values", + newline_separated_values = "newline_separated_values", + json_array = "json_array", } export type DeserializationFormatType = - | SerializationFormat + | DeserializationFormat | "croaring" | "portable" | "unsafe_frozen_croaring" | "unsafe_frozen_portable" + | "uint32_array" + | "comma_separated_values" + | "tab_separated_values" + | "newline_separated_values" + | "json_array" | boolean; +export enum FileDeserializationFormat { + /** Stable Optimized non portable C/C++ format. Used by croaring. Can be smaller than the portable format. */ + croaring = "croaring", + + /** Stable Portable Java and Go format. */ + portable = "portable", + + /** + * Non portable C/C++ frozen format. + * Is considered unsafe and unstable because the format might change at any new version. + * Can be useful for temporary storage or for sending data over the network between similar machines. + * If the content is corrupted when loaded or the buffer is modified when a frozen view is create, the behavior is undefined! + * The application may crash, buffer overrun, could be a vector of attack! + */ + unsafe_frozen_croaring = "unsafe_frozen_croaring", + + /** + * Portable version of the frozen view, compatible with Go and Java. + * Is considered unsafe and unstable because the format might change at any new version. + * Can be useful for temporary storage or for sending data over the network between similar machines. + * If the content is corrupted when loaded or the buffer is modified when a frozen view is create, the behavior is undefined! + * The application may crash, buffer overrun, could be a vector of attack! + */ + unsafe_frozen_portable = "unsafe_frozen_portable", + + /** + * A plain binary array of 32 bits integers in little endian format. 4 bytes per value. + */ + uint32_array = "uint32_array", + + comma_separated_values = "comma_separated_values", + tab_separated_values = "tab_separated_values", + newline_separated_values = "newline_separated_values", + json_array = "json_array", +} + +export type FileDeserializationFormatType = DeserializationFormatType | FileDeserializationFormat; + export enum FrozenViewFormat { /** * Non portable C/C++ frozen format. @@ -1006,6 +1127,18 @@ export interface ReadonlyRoaringBitmap32 extends ReadonlySet { format: SerializationFormatType, ): Promise; + /** + * Serializes the bitmap into a file, asynchronously. + * The bitmap will be temporarily frozen until the operation completes. + * + * This is faster, everything runs in its own thread and it consumes less memory than serializing to a Buffer and then to write to a file, + * internally it uses memory mapped files and skip all the JS overhead. + * + * @param {FileSerializationFormat | boolean} format One of the SerializationFormat enum values, or a boolean value: if false, optimized C/C++ format is used. If true, Java and Go portable format is used. + * @memberof ReadonlyRoaringBitmap32 + */ + serializeFileAsync(filePath: string, format: FileSerializationFormatType): Promise; + /** * Returns a new ReadonlyRoaringBitmap32 that is a copy of this bitmap, same as new ReadonlyRoaringBitmap32(copy) * @@ -1174,7 +1307,6 @@ export interface RoaringBitmap32 extends ReadonlyRoaringBitmap32, Set { * Overwrite the content of this bitmap copying it from an Iterable or another RoaringBitmap32. * * Is faster to pass a Uint32Array instance instead of an array or an iterable. - * * Is even faster if a RoaringBitmap32 instance is used (it performs a simple copy). * * @param {Iterable} values The new values or a RoaringBitmap32 instance. @@ -1476,9 +1608,17 @@ export class RoaringBitmap32 { public readonly SerializationFormat: typeof SerializationFormat; - public static readonly DeserializationFormat: typeof SerializationFormat; + public static readonly FileSerializationFormat: typeof FileSerializationFormat; + + public readonly FileSerializationFormat: typeof FileSerializationFormat; + + public static readonly FileDeserializationFormat: typeof FileDeserializationFormat; - public readonly DeserializationFormat: typeof SerializationFormat; + public readonly FileDeserializationFormat: typeof FileDeserializationFormat; + + public static readonly DeserializationFormat: typeof DeserializationFormat; + + public readonly DeserializationFormat: typeof DeserializationFormat; public static readonly FrozenViewFormat: typeof FrozenViewFormat; @@ -1735,12 +1875,10 @@ export class RoaringBitmap32 { * * Returns a Promise that resolves to a new RoaringBitmap32 instance. * - * Setting the portable flag to false enable a custom format that can save space compared to the portable format (e.g., for very sparse bitmaps). * The portable version is meant to be compatible with Java and Go versions. + * The croaring version is compatible with the C version, it can be smaller than the portable version. * When a frozen format is used, the buffer will be copied and the bitmap will be frozen. * - * NOTE: portable argument was optional before, now is required and an Error is thrown if the portable flag is not passed. - * * @static * @param {Uint8Array | Uint8ClampedArray | Int8Array | ArrayBuffer| SharedArrayBuffer | null | undefined} serialized An Uint8Array or a node Buffer that contains the serialized data. * @param {DeserializationFormatType} format The format of the serialized data. true means "portable". false means "croaring". @@ -1758,12 +1896,10 @@ export class RoaringBitmap32 { * * When deserialization is completed or failed, the given callback will be executed. * - * Setting the portable flag to false enable a custom format that can save space compared to the portable format (e.g., for very sparse bitmaps). * The portable version is meant to be compatible with Java and Go versions. + * The croaring version is compatible with the C version, it can be smaller than the portable version. * When a frozen format is used, the buffer will be copied and the bitmap will be frozen. * - * NOTE: portable argument was optional before, now is required and an Error is thrown if the portable flag is not passed. - * * @static * @param {Uint8Array | Uint8ClampedArray | Int8Array | ArrayBuffer| SharedArrayBuffer | null | undefined} serialized An Uint8Array or a node Buffer that contains the. * @param {DeserializationFormatType} format The format of the serialized data. true means "portable". false means "croaring". @@ -1777,6 +1913,25 @@ export class RoaringBitmap32 { callback: RoaringBitmap32Callback, ): void; + /** + * Deserializes the bitmap from a file asynchronously. + * Returns a new RoaringBitmap32 instance. + * + * The portable version is meant to be compatible with Java and Go versions. + * The croaring version is compatible with the C version, it can be smaller than the portable version. + * When a frozen format is used, the buffer will be copied and the bitmap will be frozen. + * + * This is faster, everything runs in its own thread and it consumes less memory than serializing to a Buffer and then to write to a file, + * internally it uses memory mapped files and skip all the JS overhead. + * + * @static + * @param {string} filePath The path of the file to read. + * @param {FileDeserializationFormatType} format The format of the serialized data. true means "portable". false means "croaring". + * @returns {Promise} A promise that resolves to a new RoaringBitmap32 instance. + * @memberof RoaringBitmap32 + */ + public static deserializeFileAsync(filePath: string, format: FileDeserializationFormatType): Promise; + /** * * Deserializes many bitmaps from an array of Uint8Array or an array of Buffer asynchronously in multiple parallel threads. diff --git a/index.js b/index.js index 39f1199..99fed59 100644 --- a/index.js +++ b/index.js @@ -368,6 +368,22 @@ if (!roaring[initializedSym]) { croaring: "croaring", portable: "portable", unsafe_frozen_croaring: "unsafe_frozen_croaring", + uint32_array: "uint32_array", + }, + false, + ); + + defineValue( + "FileSerializationFormat", + { + croaring: "croaring", + portable: "portable", + unsafe_frozen_croaring: "unsafe_frozen_croaring", + uint32_array: "uint32_array", + comma_separated_values: "comma_separated_values", + tab_separated_values: "tab_separated_values", + newline_separated_values: "newline_separated_values", + json_array: "json_array", }, false, ); @@ -379,6 +395,27 @@ if (!roaring[initializedSym]) { portable: "portable", unsafe_frozen_croaring: "unsafe_frozen_croaring", unsafe_frozen_portable: "unsafe_frozen_portable", + uint32_array: "uint32_array", + comma_separated_values: "comma_separated_values", + tab_separated_values: "tab_separated_values", + newline_separated_values: "newline_separated_values", + json_array: "json_array", + }, + false, + ); + + defineValue( + "FileDeserializationFormat", + { + croaring: "croaring", + portable: "portable", + unsafe_frozen_croaring: "unsafe_frozen_croaring", + unsafe_frozen_portable: "unsafe_frozen_portable", + uint32_array: "uint32_array", + comma_separated_values: "comma_separated_values", + tab_separated_values: "tab_separated_values", + newline_separated_values: "newline_separated_values", + json_array: "json_array", }, false, ); diff --git a/package-lock.json b/package-lock.json index 53e9205..b6301fc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "roaring", - "version": "2.1.0", + "version": "2.1.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "roaring", - "version": "2.1.0", + "version": "2.1.1", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { @@ -14,13 +14,13 @@ }, "devDependencies": { "@balsamic/eslint-config": "^0.4.1", - "@octokit/rest": "^19.0.7", + "@octokit/rest": "^19.0.8", "@types/chai": "^4.3.5", "@types/chai-as-promised": "^7.1.5", "@types/mocha": "^10.0.1", - "@types/node": "^20.1.4", - "@typescript-eslint/eslint-plugin": "^5.59.5", - "@typescript-eslint/parser": "^5.59.5", + "@types/node": "^20.1.5", + "@typescript-eslint/eslint-plugin": "^5.59.6", + "@typescript-eslint/parser": "^5.59.6", "benchmark": "^2.1.4", "chai": "^4.3.7", "chai-as-promised": "^7.1.1", @@ -352,18 +352,18 @@ } }, "node_modules/@octokit/openapi-types": { - "version": "17.0.0", - "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-17.0.0.tgz", - "integrity": "sha512-V8BVJGN0ZmMlURF55VFHFd/L92XQQ43KvFjNmY1IYbCN3V/h/uUFV6iQi19WEHM395Nn+1qhUbViCAD/1czzog==", + "version": "17.1.2", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-17.1.2.tgz", + "integrity": "sha512-OaS7Ol4Y+U50PbejfzQflGWRMxO04nYWO5ZBv6JerqMKE2WS/tI9VoVDDPXHBlRMGG2fOdKwtVGlFfc7AVIstw==", "dev": true }, "node_modules/@octokit/plugin-paginate-rest": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-6.0.0.tgz", - "integrity": "sha512-Sq5VU1PfT6/JyuXPyt04KZNVsFOSBaYOAq2QRZUwzVlI10KFvcbUo8lR258AAQL1Et60b0WuVik+zOWKLuDZxw==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-6.1.0.tgz", + "integrity": "sha512-5T4iXjJdYCVA1rdWS1C+uZV9AvtZY9QgTG74kFiSFVj94dZXowyi/YK8f4SGjZaL69jZthGlBaDKRdCMCF9log==", "dev": true, "dependencies": { - "@octokit/types": "^9.0.0" + "@octokit/types": "^9.2.2" }, "engines": { "node": ">= 14" @@ -382,12 +382,12 @@ } }, "node_modules/@octokit/plugin-rest-endpoint-methods": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-7.0.1.tgz", - "integrity": "sha512-pnCaLwZBudK5xCdrR823xHGNgqOzRnJ/mpC/76YPpNP7DybdsJtP7mdOwh+wYZxK5jqeQuhu59ogMI4NRlBUvA==", + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-7.1.0.tgz", + "integrity": "sha512-SWwz/hc47GaKJR6BlJI4WIVRodbAFRvrR0QRPSoPMs7krb7anYPML3psg+ThEz/kcwOdSNh/oA8qThi/Wvs4Fw==", "dev": true, "dependencies": { - "@octokit/types": "^9.0.0", + "@octokit/types": "^9.2.2", "deprecation": "^2.3.1" }, "engines": { @@ -429,27 +429,27 @@ } }, "node_modules/@octokit/rest": { - "version": "19.0.7", - "resolved": "https://registry.npmjs.org/@octokit/rest/-/rest-19.0.7.tgz", - "integrity": "sha512-HRtSfjrWmWVNp2uAkEpQnuGMJsu/+dBr47dRc5QVgsCbnIc1+GFEaoKBWkYG+zjrsHpSqcAElMio+n10c0b5JA==", + "version": "19.0.8", + "resolved": "https://registry.npmjs.org/@octokit/rest/-/rest-19.0.8.tgz", + "integrity": "sha512-/PKrzqn+zDzXKwBMwLI2IKrvk8yv8cedJOdcmxrjR3gmu6UIzURhP5oQj+4qkn7+uQi1gg7QqV4SqlaQ1HYW1Q==", "dev": true, "dependencies": { "@octokit/core": "^4.1.0", - "@octokit/plugin-paginate-rest": "^6.0.0", + "@octokit/plugin-paginate-rest": "^6.1.0", "@octokit/plugin-request-log": "^1.0.4", - "@octokit/plugin-rest-endpoint-methods": "^7.0.0" + "@octokit/plugin-rest-endpoint-methods": "^7.1.0" }, "engines": { "node": ">= 14" } }, "node_modules/@octokit/types": { - "version": "9.1.2", - "resolved": "https://registry.npmjs.org/@octokit/types/-/types-9.1.2.tgz", - "integrity": "sha512-LPbJIuu1WNoRHbN4UMysEdlissRFpTCWyoKT7kHPufI8T+XX33/qilfMWJo3mCOjNIKu0+43oSQPf+HJa0+TTQ==", + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-9.2.2.tgz", + "integrity": "sha512-9BjDxjgQIvCjNWZsbqyH5QC2Yni16oaE6xL+8SUBMzcYPF4TGQBXGA97Cl3KceK9mwiNMb1mOYCz6FbCCLEL+g==", "dev": true, "dependencies": { - "@octokit/openapi-types": "^17.0.0" + "@octokit/openapi-types": "^17.1.2" } }, "node_modules/@tootallnate/once": { @@ -525,9 +525,9 @@ "dev": true }, "node_modules/@types/node": { - "version": "20.1.4", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.1.4.tgz", - "integrity": "sha512-At4pvmIOki8yuwLtd7BNHl3CiWNbtclUbNtScGx4OHfBd4/oWoJC8KRCIxXwkdndzhxOsPXihrsOoydxBjlE9Q==", + "version": "20.1.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.1.5.tgz", + "integrity": "sha512-IvGD1CD/nego63ySR7vrAKEX3AJTcmrAN2kn+/sDNLi1Ff5kBzDeEdqWDplK+0HAEoLYej137Sk0cUU8OLOlMg==", "dev": true }, "node_modules/@types/semver": { @@ -537,15 +537,15 @@ "dev": true }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.59.5.tgz", - "integrity": "sha512-feA9xbVRWJZor+AnLNAr7A8JRWeZqHUf4T9tlP+TN04b05pFVhO5eN7/O93Y/1OUlLMHKbnJisgDURs/qvtqdg==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.59.6.tgz", + "integrity": "sha512-sXtOgJNEuRU5RLwPUb1jxtToZbgvq3M6FPpY4QENxoOggK+UpTxUBpj6tD8+Qh2g46Pi9We87E+eHnUw8YcGsw==", "dev": true, "dependencies": { "@eslint-community/regexpp": "^4.4.0", - "@typescript-eslint/scope-manager": "5.59.5", - "@typescript-eslint/type-utils": "5.59.5", - "@typescript-eslint/utils": "5.59.5", + "@typescript-eslint/scope-manager": "5.59.6", + "@typescript-eslint/type-utils": "5.59.6", + "@typescript-eslint/utils": "5.59.6", "debug": "^4.3.4", "grapheme-splitter": "^1.0.4", "ignore": "^5.2.0", @@ -571,14 +571,14 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.59.5.tgz", - "integrity": "sha512-NJXQC4MRnF9N9yWqQE2/KLRSOLvrrlZb48NGVfBa+RuPMN6B7ZcK5jZOvhuygv4D64fRKnZI4L4p8+M+rfeQuw==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.59.6.tgz", + "integrity": "sha512-7pCa6al03Pv1yf/dUg/s1pXz/yGMUBAw5EeWqNTFiSueKvRNonze3hma3lhdsOrQcaOXhbk5gKu2Fludiho9VA==", "dev": true, "dependencies": { - "@typescript-eslint/scope-manager": "5.59.5", - "@typescript-eslint/types": "5.59.5", - "@typescript-eslint/typescript-estree": "5.59.5", + "@typescript-eslint/scope-manager": "5.59.6", + "@typescript-eslint/types": "5.59.6", + "@typescript-eslint/typescript-estree": "5.59.6", "debug": "^4.3.4" }, "engines": { @@ -598,13 +598,13 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.59.5.tgz", - "integrity": "sha512-jVecWwnkX6ZgutF+DovbBJirZcAxgxC0EOHYt/niMROf8p4PwxxG32Qdhj/iIQQIuOflLjNkxoXyArkcIP7C3A==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.59.6.tgz", + "integrity": "sha512-gLbY3Le9Dxcb8KdpF0+SJr6EQ+hFGYFl6tVY8VxLPFDfUZC7BHFw+Vq7bM5lE9DwWPfx4vMWWTLGXgpc0mAYyQ==", "dev": true, "dependencies": { - "@typescript-eslint/types": "5.59.5", - "@typescript-eslint/visitor-keys": "5.59.5" + "@typescript-eslint/types": "5.59.6", + "@typescript-eslint/visitor-keys": "5.59.6" }, "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" @@ -615,13 +615,13 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-5.59.5.tgz", - "integrity": "sha512-4eyhS7oGym67/pSxA2mmNq7X164oqDYNnZCUayBwJZIRVvKpBCMBzFnFxjeoDeShjtO6RQBHBuwybuX3POnDqg==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-5.59.6.tgz", + "integrity": "sha512-A4tms2Mp5yNvLDlySF+kAThV9VTBPCvGf0Rp8nl/eoDX9Okun8byTKoj3fJ52IJitjWOk0fKPNQhXEB++eNozQ==", "dev": true, "dependencies": { - "@typescript-eslint/typescript-estree": "5.59.5", - "@typescript-eslint/utils": "5.59.5", + "@typescript-eslint/typescript-estree": "5.59.6", + "@typescript-eslint/utils": "5.59.6", "debug": "^4.3.4", "tsutils": "^3.21.0" }, @@ -642,9 +642,9 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.59.5.tgz", - "integrity": "sha512-xkfRPHbqSH4Ggx4eHRIO/eGL8XL4Ysb4woL8c87YuAo8Md7AUjyWKa9YMwTL519SyDPrfEgKdewjkxNCVeJW7w==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.59.6.tgz", + "integrity": "sha512-tH5lBXZI7T2MOUgOWFdVNUILsI02shyQvfzG9EJkoONWugCG77NDDa1EeDGw7oJ5IvsTAAGVV8I3Tk2PNu9QfA==", "dev": true, "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" @@ -655,13 +655,13 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.59.5.tgz", - "integrity": "sha512-+XXdLN2CZLZcD/mO7mQtJMvCkzRfmODbeSKuMY/yXbGkzvA9rJyDY5qDYNoiz2kP/dmyAxXquL2BvLQLJFPQIg==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.59.6.tgz", + "integrity": "sha512-vW6JP3lMAs/Tq4KjdI/RiHaaJSO7IUsbkz17it/Rl9Q+WkQ77EOuOnlbaU8kKfVIOJxMhnRiBG+olE7f3M16DA==", "dev": true, "dependencies": { - "@typescript-eslint/types": "5.59.5", - "@typescript-eslint/visitor-keys": "5.59.5", + "@typescript-eslint/types": "5.59.6", + "@typescript-eslint/visitor-keys": "5.59.6", "debug": "^4.3.4", "globby": "^11.1.0", "is-glob": "^4.0.3", @@ -682,17 +682,17 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-5.59.5.tgz", - "integrity": "sha512-sCEHOiw+RbyTii9c3/qN74hYDPNORb8yWCoPLmB7BIflhplJ65u2PBpdRla12e3SSTJ2erRkPjz7ngLHhUegxA==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-5.59.6.tgz", + "integrity": "sha512-vzaaD6EXbTS29cVH0JjXBdzMt6VBlv+hE31XktDRMX1j3462wZCJa7VzO2AxXEXcIl8GQqZPcOPuW/Z1tZVogg==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@types/json-schema": "^7.0.9", "@types/semver": "^7.3.12", - "@typescript-eslint/scope-manager": "5.59.5", - "@typescript-eslint/types": "5.59.5", - "@typescript-eslint/typescript-estree": "5.59.5", + "@typescript-eslint/scope-manager": "5.59.6", + "@typescript-eslint/types": "5.59.6", + "@typescript-eslint/typescript-estree": "5.59.6", "eslint-scope": "^5.1.1", "semver": "^7.3.7" }, @@ -708,12 +708,12 @@ } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "5.59.5", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.59.5.tgz", - "integrity": "sha512-qL+Oz+dbeBRTeyJTIy0eniD3uvqU7x+y1QceBismZ41hd4aBSRh8UAw4pZP0+XzLuPZmx4raNMq/I+59W2lXKA==", + "version": "5.59.6", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.59.6.tgz", + "integrity": "sha512-zEfbFLzB9ETcEJ4HZEEsCR9HHeNku5/Qw1jSS5McYJv5BR+ftYXwFFAH5Al+xkGaZEqowMwl7uoJjQb1YSPF8Q==", "dev": true, "dependencies": { - "@typescript-eslint/types": "5.59.5", + "@typescript-eslint/types": "5.59.6", "eslint-visitor-keys": "^3.3.0" }, "engines": { diff --git a/package.json b/package.json index 80e4362..ef25c3d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "roaring", - "version": "2.1.0", + "version": "2.1.1", "private": false, "description": "CRoaring official port for NodeJS", "keywords": [ @@ -82,13 +82,13 @@ }, "devDependencies": { "@balsamic/eslint-config": "^0.4.1", - "@octokit/rest": "^19.0.7", + "@octokit/rest": "^19.0.8", "@types/chai": "^4.3.5", "@types/chai-as-promised": "^7.1.5", "@types/mocha": "^10.0.1", - "@types/node": "^20.1.4", - "@typescript-eslint/eslint-plugin": "^5.59.5", - "@typescript-eslint/parser": "^5.59.5", + "@types/node": "^20.1.5", + "@typescript-eslint/eslint-plugin": "^5.59.6", + "@typescript-eslint/parser": "^5.59.6", "benchmark": "^2.1.4", "chai": "^4.3.7", "chai-as-promised": "^7.1.1", @@ -110,5 +110,5 @@ "typescript": "^5.0.4" }, "gypfile": true, - "roaring_version": "1.1.2" + "roaring_version": "1.2.0" } diff --git a/roaring-node.cpp b/roaring-node.cpp index 9c52780..02b4af0 100644 --- a/roaring-node.cpp +++ b/roaring-node.cpp @@ -161,8 +161,8 @@ void * gcaware_calloc(size_t count, size_t size) { void gcaware_free(void * memory) { if (memory != nullptr) { gcaware_removeAllocatedMemory(bare_malloc_size(memory)); + free(memory); } - free(memory); } void * gcaware_aligned_malloc(size_t alignment, size_t size) { @@ -176,8 +176,8 @@ void * gcaware_aligned_malloc(size_t alignment, size_t size) { void gcaware_aligned_free(void * memory) { if (memory != nullptr) { gcaware_removeAllocatedMemory(bare_aligned_malloc_size(memory)); + bare_aligned_free(memory); } - bare_aligned_free(memory); } void bare_aligned_free_callback(char * data, void * hint) { bare_aligned_free(data); } @@ -367,11 +367,11 @@ typedef struct roaring_statistics_s { // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "1.1.2" +#define ROARING_VERSION "1.2.0" enum { ROARING_VERSION_MAJOR = 1, - ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 2 + ROARING_VERSION_MINOR = 2, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION @@ -438,6 +438,15 @@ enum { #define CROARING_REGULAR_VISUAL_STUDIO 1 #endif // __clang__ #endif // _MSC_VER +#ifndef CROARING_VISUAL_STUDIO +#define CROARING_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_CLANG_VISUAL_STUDIO +#define CROARING_CLANG_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_REGULAR_VISUAL_STUDIO +#define CROARING_REGULAR_VISUAL_STUDIO 0 +#endif #if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) #undef _POSIX_C_SOURCE @@ -461,11 +470,6 @@ enum { extern "C" { // portability definitions are in global scope, not a namespace #endif -#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT) -#pragma message( \ - "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") -#endif - #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 #error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. #endif @@ -766,7 +770,7 @@ static inline int roaring_hamming(uint64_t x) { #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - #elif defined(_WIN32) +#elif defined(_WIN32) #define CROARING_IS_BIG_ENDIAN 0 #else #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ @@ -795,6 +799,130 @@ static inline int roaring_hamming(uint64_t x) { #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #endif +// Defines for the possible CROARING atomic implementations +#define CROARING_ATOMIC_IMPL_NONE 1 +#define CROARING_ATOMIC_IMPL_CPP 2 +#define CROARING_ATOMIC_IMPL_C 3 +#define CROARING_ATOMIC_IMPL_C_WINDOWS 4 + +// If the use has forced a specific implementation, use that, otherwise, +// figure out the best implementation we can use. +#if !defined(CROARING_ATOMIC_IMPL) + #if defined(__cplusplus) && __cplusplus >= 201103L + #ifdef __has_include + #if __has_include() + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP + #endif //__has_include() + #else + // We lack __has_include to check: + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP + #endif //__has_include + #elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C + #elif CROARING_REGULAR_VISUAL_STUDIO + // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/ + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS + #endif +#endif // !defined(CROARING_ATOMIC_IMPL) + +#if !defined(CROARING_ATOMIC_IMPL) + #pragma message ( "No atomic implementation found, copy on write bitmaps will not be threadsafe" ) + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_NONE +#endif + +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +#include +typedef _Atomic(uint32_t) croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + // Increasing the reference counter can always be done with + // memory_order_relaxed: New references to an object can only be formed from + // an existing reference, and passing an existing reference from one thread to + // another must already provide any required synchronization. + atomic_fetch_add_explicit(val, 1, memory_order_relaxed); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // It is important to enforce any possible access to the object in one thread + // (through an existing reference) to happen before deleting the object in a + // different thread. This is achieved by a "release" operation after dropping + // a reference (any access to the object through this reference must obviously + // happened before), and an "acquire" operation before deleting the object. + bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1; + if (is_zero) { + atomic_thread_fence(memory_order_acquire); + } + return is_zero; +} + +static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { + return atomic_load_explicit(val, memory_order_relaxed); +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP +#include +typedef std::atomic croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + val->fetch_add(1, std::memory_order_relaxed); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // See above comments on the c11 atomic implementation for memory ordering + bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1; + if (is_zero) { + std::atomic_thread_fence(std::memory_order_acquire); + } + return is_zero; +} + +static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { + return val->load(std::memory_order_relaxed); +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS +#include +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) + +// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and +// overflow is defined to wrap, so we can pretend it is a uint32_t for our case +typedef volatile long croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + _InterlockedIncrement(val); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + return _InterlockedDecrement(val) == 0; +} + +static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { + // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access + // > Simple reads and writes to properly-aligned 32-bit variables are atomic + // > operations. In other words, you will not end up with only one portion + // > of the variable updated; all bits are updated in an atomic fashion. + return *val; +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE +typedef uint32_t croaring_refcount_t; + +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + *val += 1; +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + assert(*val > 0); + *val -= 1; + return val == 0; +} + +static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { + return *val; +} +#else +#error "Unknown atomic implementation" +#endif + + // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). @@ -2792,6 +2920,20 @@ enum class SerializationFormat { croaring = 0, portable = 1, unsafe_frozen_croaring = 2, + uint32_array = 4, +}; + +enum class FileSerializationFormat { + INVALID = -1, + croaring = 0, + portable = 1, + unsafe_frozen_croaring = 2, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 }; enum class DeserializationFormat { @@ -2800,6 +2942,26 @@ enum class DeserializationFormat { portable = 1, unsafe_frozen_croaring = 2, unsafe_frozen_portable = 3, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 +}; + +enum class FileDeserializationFormat { + INVALID = -1, + croaring = 0, + portable = 1, + unsafe_frozen_croaring = 2, + unsafe_frozen_portable = 3, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 }; enum class FrozenViewFormat { @@ -2830,10 +2992,39 @@ SerializationFormat tryParseSerializationFormat(const v8::Local & val if (strcmp(*formatString, "unsafe_frozen_croaring") == 0) { return SerializationFormat::unsafe_frozen_croaring; } + if (strcmp(*formatString, "uint32_array") == 0) { + return SerializationFormat::uint32_array; + } } return SerializationFormat::INVALID; } +FileSerializationFormat tryParseFileSerializationFormat(const v8::Local & value, v8::Isolate * isolate) { + SerializationFormat sf = tryParseSerializationFormat(value, isolate); + if (sf != SerializationFormat::INVALID) { + return static_cast(sf); + } + if (!isolate || value.IsEmpty()) { + return FileSerializationFormat::INVALID; + } + if (value->IsString()) { + v8::String::Utf8Value formatString(isolate, value); + if (strcmp(*formatString, "comma_separated_values") == 0) { + return FileSerializationFormat::comma_separated_values; + } + if (strcmp(*formatString, "tab_separated_values") == 0) { + return FileSerializationFormat::tab_separated_values; + } + if (strcmp(*formatString, "newline_separated_values") == 0) { + return FileSerializationFormat::newline_separated_values; + } + if (strcmp(*formatString, "json_array") == 0) { + return FileSerializationFormat::json_array; + } + } + return FileSerializationFormat::INVALID; +} + DeserializationFormat tryParseDeserializationFormat(const v8::Local & value, v8::Isolate * isolate) { if (!isolate || value.IsEmpty()) { return DeserializationFormat::INVALID; @@ -2859,10 +3050,29 @@ DeserializationFormat tryParseDeserializationFormat(const v8::Local & if (strcmp(*formatString, "unsafe_frozen_portable") == 0) { return DeserializationFormat::unsafe_frozen_portable; } + if (strcmp(*formatString, "uint32_array") == 0) { + return DeserializationFormat::uint32_array; + } + if (strcmp(*formatString, "comma_separated_values") == 0) { + return DeserializationFormat::comma_separated_values; + } + if (strcmp(*formatString, "tab_separated_values") == 0) { + return DeserializationFormat::tab_separated_values; + } + if (strcmp(*formatString, "newline_separated_values") == 0) { + return DeserializationFormat::newline_separated_values; + } + if (strcmp(*formatString, "json_array") == 0) { + return DeserializationFormat::json_array; + } } return DeserializationFormat::INVALID; } +FileDeserializationFormat tryParseFileDeserializationFormat(const v8::Local & value, v8::Isolate * isolate) { + return (FileDeserializationFormat)tryParseDeserializationFormat(value, isolate); +} + FrozenViewFormat tryParseFrozenViewFormat(const v8::Local & value, v8::Isolate * isolate) { if (!isolate || value.IsEmpty()) { return FrozenViewFormat::INVALID; @@ -2882,6 +3092,51 @@ FrozenViewFormat tryParseFrozenViewFormat(const v8::Local & value, v8 #endif // ROARING_NODE_SERIALIZATION_FORMAT_ +// #include "WorkerError.h" + +#ifndef ROARING_NODE_WORKER_ERROR_ +#define ROARING_NODE_WORKER_ERROR_ + + +struct WorkerError { + const char * msg; + const char * syscall; + int errorno; + std::string path; + + explicit WorkerError() : msg(nullptr), syscall(nullptr), errorno(0) {} + + explicit WorkerError(const char * msg) : msg(msg), syscall(nullptr), errorno(0) {} + + explicit WorkerError(int errorno, const char * syscall, const std::string & path) : + msg(nullptr), syscall(syscall), errorno(errorno ? errorno : 5), path(path) {} + + inline bool hasError() const { return (msg != nullptr && msg[0] != '\0') || errorno != 0; } + + static WorkerError from_errno(const char * syscall, const std::string & path) { + int errorno = errno; + errno = 0; + return WorkerError(errorno, syscall, path); + } + + v8::Local newV8Error(v8::Isolate * isolate) const { + v8::EscapableHandleScope handle_scope(isolate); + v8::Local output; + if (this->errorno) { + output = node::ErrnoException( + isolate, this->errorno, this->syscall, this->msg && this->msg[0] ? this->msg : nullptr, this->path.c_str()); + } else { + const char * msg = this->msg && this->msg[0] ? this->msg : "Invalid operation"; + v8::MaybeLocal message = v8::String::NewFromUtf8(isolate, msg, v8::NewStringType::kInternalized); + output = v8::Exception::Error(message.IsEmpty() ? v8::String::Empty(isolate) : message.ToLocalChecked()); + } + return handle_scope.Escape(output); + } +}; + +#endif + + using namespace roaring; using namespace roaring::api; @@ -2992,9 +3247,6 @@ class RoaringBitmap32 final : public ObjectWrap { _version(0), frozenCounter(0), readonlyViewOf(nullptr) { - if (this->roaring) { - roaring_bitmap_set_copy_on_write(this->roaring, true); - } ++addonData->RoaringBitmap32_instances; gcaware_addAllocatedMemory(sizeof(RoaringBitmap32)); } @@ -3804,6 +4056,328 @@ void RoaringBitmap32_xorManyStatic(const v8::FunctionCallbackInfo & i #define ROARING_NODE_SERIALIZATION_ +// #include "serialization-csv.h" + +#ifndef ROARING_NODE_SERIALIZATION_CSV_ +#define ROARING_NODE_SERIALIZATION_CSV_ + +#include + +// #include "mmap.h" + +#ifndef ROARING_NODE_MMAP_ +#define ROARING_NODE_MMAP_ + +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) + +/* mmap() replacement for Windows + * + * Author: Mike Frysinger + * Placed into the public domain + */ + +/* References: + * CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx + * CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx + * MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx + * UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx + */ + +# include +# include +# include + +# define PROT_READ 0x1 +# define PROT_WRITE 0x2 +/* This flag is only available in WinXP+ */ +# ifdef FILE_MAP_EXECUTE +# define PROT_EXEC 0x4 +# else +# define PROT_EXEC 0x0 +# define FILE_MAP_EXECUTE 0 +# endif + +# define MAP_SHARED 0x01 +# define MAP_PRIVATE 0x02 +# define MAP_ANONYMOUS 0x20 +# define MAP_ANON MAP_ANONYMOUS +# define MAP_FAILED ((void *)-1) + +# ifdef __USE_FILE_OFFSET64 +# define DWORD_HI(x) (x >> 32) +# define DWORD_LO(x) ((x)&0xffffffff) +# else +# define DWORD_HI(x) (0) +# define DWORD_LO(x) (x) +# endif + +static void * mmap(void * start, size_t length, int prot, int flags, int fd, off_t offset) { + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return MAP_FAILED; + if (fd == -1) { + if (!(flags & MAP_ANON) || offset) return MAP_FAILED; + } else if (flags & MAP_ANON) + return MAP_FAILED; + + DWORD flProtect; + if (prot & PROT_WRITE) { + if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE_READWRITE; + else + flProtect = PAGE_READWRITE; + } else if (prot & PROT_EXEC) { + if (prot & PROT_READ) + flProtect = PAGE_EXECUTE_READ; + else if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE; + } else + flProtect = PAGE_READONLY; + + off_t end = length + offset; + HANDLE mmap_fd, h; + if (fd == -1) + mmap_fd = INVALID_HANDLE_VALUE; + else + mmap_fd = (HANDLE)_get_osfhandle(fd); + h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); + if (h == NULL) return MAP_FAILED; + + DWORD dwDesiredAccess; + if (prot & PROT_WRITE) + dwDesiredAccess = FILE_MAP_WRITE; + else + dwDesiredAccess = FILE_MAP_READ; + if (prot & PROT_EXEC) dwDesiredAccess |= FILE_MAP_EXECUTE; + if (flags & MAP_PRIVATE) dwDesiredAccess |= FILE_MAP_COPY; + void * ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); + if (ret == NULL) { + CloseHandle(h); + ret = MAP_FAILED; + } + return ret; +} + +static void munmap(void * addr, size_t length) { + UnmapViewOfFile(addr); + /* ruh-ro, we leaked handle from CreateFileMapping() ... */ +} + +# undef DWORD_HI +# undef DWORD_LO + +#else + +# include +# include + +#endif +#endif + + +struct CsvFileDescriptorSerializer final { + public: + static int iterate(const roaring::api::roaring_bitmap_t * r, int fd, FileSerializationFormat format) { + char separator; + switch (format) { + case FileSerializationFormat::newline_separated_values: separator = '\n'; break; + case FileSerializationFormat::comma_separated_values: separator = ','; break; + case FileSerializationFormat::tab_separated_values: separator = '\t'; break; + case FileSerializationFormat::json_array: separator = ','; break; + default: return EINVAL; + } + + CsvFileDescriptorSerializer writer(fd, separator); + if (format == FileSerializationFormat::json_array) { + writer.appendChar('['); + } + + if (r) { + roaring_iterate(r, roaringIteratorFn, &writer); + } + + if (format == FileSerializationFormat::newline_separated_values) { + writer.appendChar('\n'); + } else if (format == FileSerializationFormat::json_array) { + writer.appendChar(']'); + } + + if (!writer.flush()) { + int errorno = errno; + errno = 0; + return errorno ? errorno : EIO; + } + + return 0; + } + + private: + const constexpr static size_t BUFFER_SIZE = 131072; + + char * buf; + size_t bufPos; + int fd; + bool needsSeparator; + char separator; + + CsvFileDescriptorSerializer(int fd, char separator) : + buf((char *)gcaware_aligned_malloc(32, BUFFER_SIZE)), bufPos(0), fd(fd), needsSeparator(false), separator(separator) {} + + ~CsvFileDescriptorSerializer() { gcaware_aligned_free(this->buf); } + + bool flush() { + if (this->bufPos == 0) { + return true; + } + if (!this->buf) { + return false; + } + ssize_t written = write(this->fd, this->buf, this->bufPos); + if (written < 0) { + gcaware_aligned_free(this->buf); + this->buf = nullptr; + return false; + } + this->bufPos = 0; + return true; + } + + bool appendChar(char c) { + if (this->bufPos + 1 >= BUFFER_SIZE) { + if (!this->flush()) { + return false; + } + } + if (!this->buf) { + return false; + } + this->buf[this->bufPos++] = c; + return true; + } + + bool appendValue(uint32_t value) { + if (this->bufPos + 15 >= BUFFER_SIZE) { + if (!this->flush()) { + return false; + } + } + if (!this->buf) { + return false; + } + if (this->needsSeparator) { + this->buf[this->bufPos++] = this->separator; + } + this->needsSeparator = true; + + char * str = this->buf + this->bufPos; + int32_t i, j; + char c; + + /* uint to decimal */ + i = 0; + do { + uint32_t remainder = value % 10; + str[i++] = (char)(remainder + 48); + value = value / 10; + } while (value != 0); + + this->bufPos += i; + + /* reverse string */ + for (j = 0, i--; j < i; j++, i--) { + c = str[i]; + str[i] = str[j]; + str[j] = c; + } + + return true; + } + + static bool roaringIteratorFn(uint32_t value, void * param) { + return ((CsvFileDescriptorSerializer *)param)->appendValue(value); + } +}; + +WorkerError deserializeRoaringCsvFile( + roaring::api::roaring_bitmap_t * r, int fd, const char * input, size_t input_size, const std::string & filePath) { + const constexpr static size_t BUFFER_SIZE = 131072; + + char * buf; + ssize_t readBytes; + if (input == nullptr) { + buf = (char *)gcaware_aligned_malloc(32, BUFFER_SIZE); + if (!buf) { + return WorkerError("Failed to allocate memory for text deserialization"); + } + } else { + buf = (char *)input; + readBytes = (ssize_t)input_size; + if (readBytes < 0) { + return WorkerError("Input too big"); + } + if (readBytes == 0) { + return WorkerError(); + } + } + + roaring_bulk_context_t context; + memset(&context, 0, sizeof(context)); + uint64_t value = 0; + + bool hasValue = false; + bool isNegative = false; + for (;;) { + if (input == nullptr) { + readBytes = read(fd, buf, BUFFER_SIZE); + if (readBytes <= 0) { + if (readBytes < 0) { + WorkerError err = WorkerError::from_errno("read", filePath); + gcaware_aligned_free(buf); + return err; + } + break; + } + } + + for (ssize_t i = 0; i < readBytes; i++) { + char c = buf[i]; + if (c >= '0' && c <= '9') { + if (value <= 0xffffffff) { + hasValue = true; + value = value * 10 + (c - '0'); + } + } else { + if (hasValue) { + hasValue = false; + if (!isNegative && value <= 0xffffffff) { + roaring_bitmap_add_bulk(r, &context, value); + } + } + value = 0; + isNegative = c == '-'; + } + } + + if (input != nullptr) { + break; + } + } + + if (!isNegative && hasValue && value <= 0xffffffff) { + roaring_bitmap_add_bulk(r, &context, value); + } + if (input == nullptr) { + gcaware_aligned_free(buf); + } + + return WorkerError(); +} + +#endif + + +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) +# include +#endif + #ifndef CROARING_SERIALIZATION_ARRAY_UINT32 constexpr const unsigned char CROARING_SERIALIZATION_ARRAY_UINT32 = 1; #endif @@ -3812,13 +4386,97 @@ constexpr const unsigned char CROARING_SERIALIZATION_ARRAY_UINT32 = 1; constexpr const unsigned char CROARING_SERIALIZATION_CONTAINER = 2; #endif -class RoaringBitmapSerializer final { +class RoaringBitmapSerializerBase { + private: + bool serializeArray = false; + size_t cardinality = 0; + public: RoaringBitmap32 * self = nullptr; - SerializationFormat format = SerializationFormat::INVALID; - v8utils::TypedArrayContent inputBuffer; + FileSerializationFormat format = FileSerializationFormat::INVALID; size_t volatile serializedSize = 0; + + WorkerError computeSerializedSize() { + size_t buffersize; + switch (this->format) { + case FileSerializationFormat::croaring: { + this->cardinality = this->self->getSize(); + auto sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + auto portablesize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); + if (portablesize < sizeasarray || sizeasarray >= MAX_SERIALIZATION_ARRAY_SIZE_IN_BYTES - 1) { + buffersize = portablesize + 1; + } else { + this->serializeArray = true; + buffersize = (size_t)sizeasarray + 1; + } + break; + } + + case FileSerializationFormat::portable: { + buffersize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); + break; + } + + case FileSerializationFormat::unsafe_frozen_croaring: { + buffersize = roaring_bitmap_frozen_size_in_bytes(this->self->roaring); + break; + } + + case FileSerializationFormat::uint32_array: { + buffersize = this->self->getSize() * sizeof(uint32_t); + break; + } + + default: return WorkerError("RoaringBitmap32 serialization format is invalid"); + } + + this->serializedSize = buffersize; + return WorkerError(); + } + + WorkerError serializeToBuffer(uint8_t * data) { + if (!data) { + return WorkerError("RoaringBitmap32 serialization allocation failed"); + } + + switch (format) { + case FileSerializationFormat::croaring: { + if (serializeArray) { + ((uint8_t *)data)[0] = CROARING_SERIALIZATION_ARRAY_UINT32; + memcpy(data + 1, &this->cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)(data + 1 + sizeof(uint32_t))); + } else { + ((uint8_t *)data)[0] = CROARING_SERIALIZATION_CONTAINER; + roaring_bitmap_portable_serialize(self->roaring, (char *)data + 1); + } + break; + } + + case FileSerializationFormat::portable: { + roaring_bitmap_portable_serialize(self->roaring, (char *)data); + break; + } + + case FileSerializationFormat::unsafe_frozen_croaring: { + roaring_bitmap_frozen_serialize(self->roaring, (char *)data); + break; + } + + case FileSerializationFormat::uint32_array: { + roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)data); + break; + } + + default: return WorkerError("RoaringBitmap32 serialization format is invalid"); + } + return WorkerError(); + } +}; + +class RoaringBitmapSerializer final : public RoaringBitmapSerializerBase { + public: + v8utils::TypedArrayContent inputBuffer; uint8_t * volatile allocatedBuffer = nullptr; void parseArguments(const v8::FunctionCallbackInfo & info) { @@ -3848,8 +4506,8 @@ class RoaringBitmapSerializer final { return v8utils::throwError(isolate, "RoaringBitmap32 serialization buffer argument was invalid"); } } - this->format = tryParseSerializationFormat(info[formatArgIndex], isolate); - if (this->format == SerializationFormat::INVALID) { + this->format = static_cast(tryParseSerializationFormat(info[formatArgIndex], isolate)); + if (this->format == FileSerializationFormat::INVALID) { return v8utils::throwError(isolate, "RoaringBitmap32 serialization format argument was invalid"); } if (bufferArgIndex >= 0) { @@ -3860,146 +4518,332 @@ class RoaringBitmapSerializer final { this->self = bitmap; } - const char * serialize() { - size_t buffersize; - size_t cardinality; - bool serializeArray = 0; + WorkerError serialize() { + WorkerError err = this->computeSerializedSize(); + if (err.hasError()) { + return err; + } + + uint8_t * data = this->inputBuffer.data; + + if (data == nullptr) { + data = (uint8_t *)bare_aligned_malloc( + this->format == FileSerializationFormat::unsafe_frozen_croaring ? 32 : 8, this->serializedSize); + this->allocatedBuffer = data; + } else if (this->inputBuffer.length < this->serializedSize) { + return WorkerError("RoaringBitmap32 serialization buffer is too small"); + } + + return this->serializeToBuffer(data); + } + + void done(v8::Isolate * isolate, v8::Local & result) { + if (!this->self) { + return; + } + uint8_t * allocatedBuffer = this->allocatedBuffer; + + if (allocatedBuffer) { + // Create a new buffer using the allocated memory + v8::MaybeLocal nodeBufferMaybeLocal = + node::Buffer::New(isolate, (char *)allocatedBuffer, this->serializedSize, bare_aligned_free_callback, nullptr); + if (!nodeBufferMaybeLocal.ToLocal(&result)) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization failed to create a new buffer"); + } + this->allocatedBuffer = nullptr; + return; + } + + if (!v8utils::v8ValueToBufferWithLimit( + isolate, self->addonData, this->inputBuffer.bufferPersistent.Get(isolate), this->serializedSize, result)) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization failed to create the buffer view"); + } + } + + ~RoaringBitmapSerializer() { bare_aligned_free(this->allocatedBuffer); } +}; + +class RoaringBitmapFileSerializer final : public RoaringBitmapSerializerBase { + public: + std::string filePath; + + void parseArguments(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = info.GetIsolate(); + v8::HandleScope scope(isolate); + + RoaringBitmap32 * bitmap = ObjectWrap::TryUnwrap(info.Holder(), isolate); + if (bitmap == nullptr) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization on invalid object"); + } + if (info.Length() < 2) { + return v8utils::throwError(isolate, "RoaringBitmap32::serializeFileAsync requires 2 arguments"); + } + if (!info[0]->IsString()) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization file path argument was invalid"); + } + + this->format = tryParseFileSerializationFormat(info[1], isolate); + if (this->format == FileSerializationFormat::INVALID) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization format argument was invalid"); + } + + v8::String::Utf8Value filePathUtf8(isolate, info[0]); + this->filePath = std::string(*filePathUtf8, filePathUtf8.length()); + this->self = bitmap; + } + + WorkerError serialize() { switch (this->format) { - case SerializationFormat::croaring: { - cardinality = this->self->getSize(); - auto sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - auto portablesize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); - if (portablesize < sizeasarray || sizeasarray >= MAX_SERIALIZATION_ARRAY_SIZE_IN_BYTES - 1) { - buffersize = portablesize + 1; - } else { - serializeArray = true; - buffersize = (size_t)sizeasarray + 1; + case FileSerializationFormat::comma_separated_values: + case FileSerializationFormat::tab_separated_values: + case FileSerializationFormat::newline_separated_values: + case FileSerializationFormat::json_array: { + int fd = open(this->filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + return WorkerError::from_errno("open", this->filePath); + } + int errorno = CsvFileDescriptorSerializer::iterate(this->self->roaring, fd, this->format); + close(fd); + return errorno != 0 ? WorkerError(errorno, "write", this->filePath) : WorkerError(); + } + + default: break; + } + + WorkerError err = this->computeSerializedSize(); + if (err.hasError()) { + return err; + } + + int fd = open(this->filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + return WorkerError::from_errno("open", this->filePath); + } + +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) + int truncateErr = _chsize_s(fd, this->serializedSize); + if (truncateErr != 0) { + err = WorkerError(truncateErr, "_chsize_s", this->filePath); + close(fd); + return err; + } +#else + if (ftruncate(fd, this->serializedSize) < 0) { + err = WorkerError::from_errno("ftruncate", this->filePath); + close(fd); + return err; + } +#endif + + if (this->serializedSize != 0) { + uint8_t * data = (uint8_t *)mmap(nullptr, this->serializedSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) { + // mmap failed, allocate and write to buffer instead + data = (uint8_t *)gcaware_aligned_malloc(32, this->serializedSize); + if (data) { + err = this->serializeToBuffer(data); + if (!err.hasError()) { + auto wresult = write(fd, data, this->serializedSize); + if (wresult < 0) { + err = WorkerError::from_errno("write", this->filePath); + close(fd); + } + } + gcaware_aligned_free(data); + return err; } - break; + + err = WorkerError::from_errno("mmap", this->filePath); + close(fd); + return err; } - case SerializationFormat::portable: { - buffersize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); - break; + err = this->serializeToBuffer(data); + + if (err.hasError()) { + close(fd); + return err; + } + + munmap(data, this->serializedSize); + } + + close(fd); + return err; + } +}; + +class RoaringBitmapDeserializerBase { + public: + FileDeserializationFormat format = FileDeserializationFormat::INVALID; + v8::Isolate * isolate = nullptr; + roaring_bitmap_t_ptr volatile roaring = nullptr; + uint8_t * volatile frozenBuffer = nullptr; + + ~RoaringBitmapDeserializerBase() { + if (this->frozenBuffer != nullptr) { + bare_aligned_free(this->frozenBuffer); + } + if (this->roaring) { + roaring_bitmap_free(this->roaring); + } + } + + WorkerError deserializeBuf(const char * bufaschar, size_t bufLen) { + if (this->format == FileDeserializationFormat::INVALID) { + return WorkerError("RoaringBitmap32 deserialization format argument was invalid"); + } + + if (bufLen == 0 || !bufaschar) { + // Empty bitmap for an empty buffer. + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); } + return WorkerError(); + } + + switch (this->format) { + case FileDeserializationFormat::portable: { + this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar, bufLen); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - portable deserialization failed"); + } + return WorkerError(); + } + + case FileDeserializationFormat::croaring: { + switch ((unsigned char)bufaschar[0]) { + case CROARING_SERIALIZATION_ARRAY_UINT32: { + uint32_t card; + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + + if (card * sizeof(uint32_t) + sizeof(uint32_t) + 1 != bufLen) { + return WorkerError("RoaringBitmap32 deserialization corrupted data, wrong cardinality header"); + } + + const uint32_t * elems = (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + this->roaring = roaring_bitmap_of_ptr(card, elems); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - uint32 array deserialization failed"); + } + return WorkerError(); + } + + case CROARING_SERIALIZATION_CONTAINER: { + this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar + 1, bufLen - 1); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - container deserialization failed"); + } + return WorkerError(); + } + } - case SerializationFormat::unsafe_frozen_croaring: { - buffersize = roaring_bitmap_frozen_size_in_bytes(this->self->roaring); - break; + return WorkerError("RoaringBitmap32 deserialization - invalid portable header byte"); } - default: return "RoaringBitmap32 serialization format is invalid"; - } + case FileDeserializationFormat::unsafe_frozen_portable: + case FileDeserializationFormat::unsafe_frozen_croaring: { + this->frozenBuffer = (uint8_t *)bare_aligned_malloc(32, bufLen); + if (!this->frozenBuffer) { + return WorkerError("RoaringBitmap32 deserialization - failed to allocate memory for frozen bitmap"); + } + memcpy(this->frozenBuffer, bufaschar, bufLen); - this->serializedSize = buffersize; - uint8_t * data = this->inputBuffer.data; + if (format == FileDeserializationFormat::unsafe_frozen_croaring) { + this->roaring = + const_cast(roaring_bitmap_frozen_view((const char *)this->frozenBuffer, bufLen)); + return this->roaring ? WorkerError() + : WorkerError("RoaringBitmap32 deserialization - failed to create a frozen view"); + } - if (data == nullptr) { - data = - (uint8_t *)bare_aligned_malloc(this->format == SerializationFormat::unsafe_frozen_croaring ? 32 : 8, buffersize); - if (!data) { - return "RoaringBitmap32 serialization allocation failed"; + this->roaring = + const_cast(roaring_bitmap_portable_deserialize_frozen((const char *)this->frozenBuffer)); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - failed to create a frozen view"); + } + return WorkerError(); } - this->allocatedBuffer = data; - } else if (this->inputBuffer.length < buffersize) { - return "RoaringBitmap32 serialization buffer is too small"; - } - switch (format) { - case SerializationFormat::croaring: { - if (serializeArray) { - data[0] = CROARING_SERIALIZATION_ARRAY_UINT32; - memcpy(data + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)(data + 1 + sizeof(uint32_t))); - } else { - data[0] = CROARING_SERIALIZATION_CONTAINER; - roaring_bitmap_portable_serialize(self->roaring, (char *)data + 1); + case FileDeserializationFormat::uint32_array: { + if (bufLen % 4 != 0) { + return WorkerError( + "RoaringBitmap32 deserialization - uint32 array deserialization failed, input length is not a multiple of 4"); } - break; - } - case SerializationFormat::portable: { - roaring_bitmap_portable_serialize(self->roaring, (char *)data); - break; + if (bufLen == 0) { + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); + } + return WorkerError(); + } + + this->roaring = roaring_bitmap_of_ptr(bufLen >> 2, (const uint32_t *)bufaschar); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - uint32 array deserialization failed"); + } + return WorkerError(); } - case SerializationFormat::unsafe_frozen_croaring: { - roaring_bitmap_frozen_serialize(self->roaring, (char *)data); - break; + case FileDeserializationFormat::comma_separated_values: + case FileDeserializationFormat::tab_separated_values: + case FileDeserializationFormat::newline_separated_values: + case FileDeserializationFormat::json_array: { + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); + } + if (bufaschar != nullptr) { + return deserializeRoaringCsvFile(this->roaring, -1, bufaschar, bufLen, ""); + } + return WorkerError(); } - default: return "RoaringBitmap32 serialization format is invalid"; + default: return WorkerError("RoaringBitmap32 deserialization - unknown deserialization format"); } - return nullptr; } - void done(v8::Isolate * isolate, v8::Local & result) { - if (!this->self) { - return; - } - uint8_t * allocatedBuffer = this->allocatedBuffer; - - if (allocatedBuffer) { - // Create a new buffer using the allocated memory - v8::MaybeLocal nodeBufferMaybeLocal = - node::Buffer::New(isolate, (char *)allocatedBuffer, this->serializedSize, bare_aligned_free_callback, nullptr); - if (!nodeBufferMaybeLocal.ToLocal(&result)) { - return v8utils::throwError(isolate, "RoaringBitmap32 serialization failed to create a new buffer"); - } - this->allocatedBuffer = nullptr; - return; - } + void finalizeTargetBitmap(RoaringBitmap32 * targetBitmap) { + targetBitmap->replaceBitmapInstance(this->isolate, this->roaring); + this->roaring = nullptr; - if (!v8utils::v8ValueToBufferWithLimit( - isolate, self->addonData, this->inputBuffer.bufferPersistent.Get(isolate), this->serializedSize, result)) { - return v8utils::throwError(isolate, "RoaringBitmap32 serialization failed to create the buffer view"); + if (this->frozenBuffer) { + targetBitmap->frozenCounter = RoaringBitmap32::FROZEN_COUNTER_HARD_FROZEN; + targetBitmap->frozenStorage.data = this->frozenBuffer; + targetBitmap->frozenStorage.length = std::numeric_limits::max(); + this->frozenBuffer = nullptr; } } - - ~RoaringBitmapSerializer() { bare_aligned_free(this->allocatedBuffer); } }; -class RoaringBitmapDeserializer final { +class RoaringBitmapDeserializer final : public RoaringBitmapDeserializerBase { public: - DeserializationFormat format = DeserializationFormat::INVALID; - v8::Isolate * isolate = nullptr; - RoaringBitmap32 * targetBitmap = nullptr; v8utils::TypedArrayContent inputBuffer; - roaring_bitmap_t_ptr volatile roaring = nullptr; - uint8_t * volatile frozenBuffer = nullptr; - - ~RoaringBitmapDeserializer() { - if (this->frozenBuffer != nullptr) { - bare_aligned_free(this->frozenBuffer); - } - if (this->roaring) { - roaring_bitmap_free(this->roaring); - } - } - - const char * setOutput(v8::Isolate * isolate, const v8::MaybeLocal & valueMaybe, DeserializationFormat format) { + WorkerError setOutput( + v8::Isolate * isolate, const v8::MaybeLocal & valueMaybe, FileDeserializationFormat format) { this->isolate = isolate; this->format = format; if (valueMaybe.IsEmpty()) { - return nullptr; + return WorkerError(); } v8::Local v; if (!valueMaybe.ToLocal(&v) || v->IsNullOrUndefined()) { - return nullptr; + return WorkerError(); } if (!this->inputBuffer.set(isolate, v)) { - return "RoaringBitmap32 deserialization output argument was not a valid typed array"; + return WorkerError("RoaringBitmap32 deserialization output argument was not a valid typed array"); } - return nullptr; + return WorkerError(); } - const char * parseArguments(const v8::FunctionCallbackInfo & info, bool isInstanceMethod) { + WorkerError parseArguments(const v8::FunctionCallbackInfo & info, bool isInstanceMethod) { v8::Isolate * isolate = info.GetIsolate(); this->isolate = isolate; v8::HandleScope scope(isolate); @@ -4007,15 +4851,15 @@ class RoaringBitmapDeserializer final { if (isInstanceMethod) { this->targetBitmap = ObjectWrap::TryUnwrap(info.Holder(), isolate); if (this->targetBitmap == nullptr) { - return "RoaringBitmap32 deserialization on invalid object"; + return WorkerError("RoaringBitmap32 deserialization on invalid object"); } if (this->targetBitmap->isFrozen()) { - return ERROR_FROZEN; + return WorkerError(ERROR_FROZEN); } } if (info.Length() < 2) { - return "RoaringBitmap32 deserialization expects a format and a buffer arguments"; + return WorkerError("RoaringBitmap32 deserialization expects a format and a buffer arguments"); } int bufferArgIndex = 1; @@ -4024,114 +4868,120 @@ class RoaringBitmapDeserializer final { bufferArgIndex = 0; fmt = tryParseDeserializationFormat(info[1], isolate); } - this->format = fmt; + this->format = static_cast(fmt); if ( !info[bufferArgIndex]->IsNullOrUndefined() && !this->inputBuffer.set(isolate, info[bufferArgIndex]->ToObject(isolate->GetCurrentContext()))) { - return "RoaringBitmap32 deserialization buffer argument was invalid"; + return WorkerError("RoaringBitmap32 deserialization buffer argument was invalid"); } - return nullptr; + return WorkerError(); } - const char * deserialize() { - if (this->format == DeserializationFormat::INVALID) { - return "RoaringBitmap32 deserialization format argument was invalid"; + WorkerError deserialize() { return this->deserializeBuf((const char *)this->inputBuffer.data, this->inputBuffer.length); } +}; + +class RoaringBitmapFileDeserializer final : public RoaringBitmapDeserializerBase { + public: + std::string filePath; + + WorkerError parseArguments(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = info.GetIsolate(); + this->isolate = isolate; + v8::HandleScope scope(isolate); + + if (info.Length() < 2) { + return WorkerError("RoaringBitmap32::deserializeFileAsync expects a file path and format"); } - auto bufLen = this->inputBuffer.length; - const char * bufaschar = (const char *)this->inputBuffer.data; + if (!info[0]->IsString()) { + return WorkerError("RoaringBitmap32::deserializeFileAsync expects a file path as the first argument"); + } - if (bufLen == 0 || !bufaschar) { - // Empty bitmap for an empty buffer. - this->roaring = roaring_bitmap_create(); - if (!this->roaring) { - return "RoaringBitmap32 deserialization failed to create an empty bitmap"; - } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + v8::String::Utf8Value filePathUtf8(isolate, info[0]); + this->filePath = std::string(*filePathUtf8, filePathUtf8.length()); + + FileDeserializationFormat fmt = tryParseFileDeserializationFormat(info[1], isolate); + if (fmt == FileDeserializationFormat::INVALID) { + return WorkerError("RoaringBitmap32::deserializeFileAsync invalid format"); + } + this->format = fmt; + return WorkerError(); + } + + WorkerError deserialize() { + int fd = open(this->filePath.c_str(), O_RDONLY); + if (fd == -1) { + return WorkerError::from_errno("open", this->filePath); } switch (this->format) { - case DeserializationFormat::portable: { - this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar, bufLen); + case FileDeserializationFormat::comma_separated_values: + case FileDeserializationFormat::tab_separated_values: + case FileDeserializationFormat::newline_separated_values: + case FileDeserializationFormat::json_array: { + this->roaring = roaring_bitmap_create(); if (!this->roaring) { - return "RoaringBitmap32::deserialize - portable deserialization failed"; + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + WorkerError err = deserializeRoaringCsvFile(this->roaring, fd, nullptr, 0, this->filePath); + close(fd); + return err; } - case DeserializationFormat::croaring: { - switch ((unsigned char)bufaschar[0]) { - case CROARING_SERIALIZATION_ARRAY_UINT32: { - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - - if (card * sizeof(uint32_t) + sizeof(uint32_t) + 1 != bufLen) { - return "RoaringBitmap32 deserialization corrupted data, wrong cardinality header"; - } - - const uint32_t * elems = (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - this->roaring = roaring_bitmap_of_ptr(card, elems); - if (!this->roaring) { - return "RoaringBitmap32 deserialization - uint32 array deserialization failed"; - } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; - } + default: break; + } - case CROARING_SERIALIZATION_CONTAINER: { - this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar + 1, bufLen - 1); - if (!this->roaring) { - return "RoaringBitmap32 deserialization - container deserialization failed"; - } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; - } - } + struct stat st; + if (fstat(fd, &st) == -1) { + WorkerError err = WorkerError::from_errno("fstat", this->filePath); + close(fd); + return err; + } - return "RoaringBitmap32 deserialization - invalid portable header byte"; - } + size_t fileSize = st.st_size; - case DeserializationFormat::unsafe_frozen_portable: - case DeserializationFormat::unsafe_frozen_croaring: { - this->frozenBuffer = (uint8_t *)bare_aligned_malloc(32, bufLen); - if (!this->frozenBuffer) { - return "RoaringBitmap32 deserialization - failed to allocate memory for frozen bitmap"; - } - memcpy(this->frozenBuffer, bufaschar, bufLen); + if (fileSize == 0) { + WorkerError err = this->deserializeBuf(nullptr, 0); + close(fd); + return err; + } - if (format == DeserializationFormat::unsafe_frozen_croaring) { - this->roaring = - const_cast(roaring_bitmap_frozen_view((const char *)this->frozenBuffer, bufLen)); - return this->roaring ? nullptr : "RoaringBitmap32 deserialization - failed to create a frozen view"; + void * buf = mmap(nullptr, fileSize, PROT_READ, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + // mmap failed, try to read the file into a buffer + buf = gcaware_aligned_malloc(32, fileSize); + if (buf != nullptr) { + ssize_t bytesRead = read(fd, buf, fileSize); + if (bytesRead == -1) { + WorkerError err = WorkerError::from_errno("read", this->filePath); + close(fd); + gcaware_aligned_free(buf); + return err; } - - this->roaring = - const_cast(roaring_bitmap_portable_deserialize_frozen((const char *)this->frozenBuffer)); - if (!this->roaring) { - return "RoaringBitmap32 deserialization - failed to create a frozen view"; + if ((size_t)bytesRead != fileSize) { + WorkerError err = WorkerError("RoaringBitmap32::deserializeFileAsync read less bytes than expected"); + close(fd); + gcaware_aligned_free(buf); + return err; } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + WorkerError err = this->deserializeBuf((const char *)buf, fileSize); + gcaware_aligned_free(buf); + close(fd); + return err; } - default: return "RoaringBitmap32::deserialize - unknown deserialization format"; + WorkerError err = WorkerError::from_errno("mmap", this->filePath); + close(fd); + return err; } - } - void finalizeTargetBitmap(RoaringBitmap32 * targetBitmap) { - targetBitmap->replaceBitmapInstance(this->isolate, this->roaring); - this->roaring = nullptr; + WorkerError err = this->deserializeBuf((const char *)buf, fileSize); - if (this->frozenBuffer) { - targetBitmap->frozenCounter = RoaringBitmap32::FROZEN_COUNTER_HARD_FROZEN; - targetBitmap->frozenStorage.data = this->frozenBuffer; - targetBitmap->frozenStorage.length = std::numeric_limits::max(); - this->frozenBuffer = nullptr; - } + munmap(buf, fileSize); + close(fd); + return err; } }; @@ -4183,15 +5033,15 @@ class AsyncWorker { inline bool hasStarted() const { return this->_started; } - inline bool hasError() const { return this->_error != nullptr; } + inline bool hasError() const { return this->_error.hasError(); } - inline void setError(const_char_ptr_t error) { - if (error != nullptr && this->_error == nullptr) { + inline void setError(const WorkerError & error) { + if (error.hasError() && !this->_error.hasError()) { this->_error = error; } } - inline void clearError() { this->_error = nullptr; } + inline void clearError() { this->_error = WorkerError(); } static v8::Local run(AsyncWorker * worker) { v8::EscapableHandleScope scope(worker->isolate); @@ -4210,7 +5060,7 @@ class AsyncWorker { auto promise = resolver->GetPromise(); if (promise.IsEmpty()) { - worker->setError("Failed to create Promise"); + worker->setError(WorkerError("Failed to create Promise")); } else { returnValue = promise; worker->_resolver.Reset(isolate, resolver); @@ -4271,7 +5121,7 @@ class AsyncWorker { private: uv_work_t _task{}; - volatile const_char_ptr_t _error; + WorkerError _error; bool _started; volatile bool _completed; v8::Persistent> _callback; @@ -4282,7 +5132,7 @@ class AsyncWorker { if ( uv_queue_work(node::GetCurrentEventLoop(v8::Isolate::GetCurrent()), &_task, AsyncWorker::_work, AsyncWorker::_done) != 0) { - setError("Error starting async thread"); + setError(WorkerError("Error starting async thread")); return false; } return true; @@ -4302,7 +5152,7 @@ class AsyncWorker { v8::Local result; - if (worker->_error == nullptr && error.IsEmpty()) { + if (!worker->_error.hasError() && error.IsEmpty()) { worker->done(result); } @@ -4319,13 +5169,11 @@ class AsyncWorker { } if (result.IsEmpty() && error.IsEmpty()) { - worker->setError("Async operation failed"); + worker->setError(WorkerError("Async operation failed")); } if (worker->hasError() && error.IsEmpty()) { - v8::MaybeLocal message = - v8::String::NewFromUtf8(isolate, worker->_error, v8::NewStringType::kInternalized); - error = v8::Exception::Error(message.IsEmpty() ? v8::String::Empty(isolate) : message.ToLocalChecked()); + error = worker->_error.newV8Error(isolate); } auto context = isolate->GetCurrentContext(); @@ -4389,7 +5237,7 @@ class AsyncWorker { thread_local_isolate = worker->isolate; if (status != 0) { - worker->setError("Error executing async thread"); + worker->setError(WorkerError("Error executing async thread")); } _complete(worker); @@ -4398,7 +5246,7 @@ class AsyncWorker { v8::Local _makeError(v8::Local error) { if (error.IsEmpty() || error->IsNull() || error->IsUndefined()) { - this->setError("Exception in async operation"); + this->setError(WorkerError("Exception in async operation")); return {}; } if (!error->IsObject()) { @@ -4461,7 +5309,7 @@ class ParallelAsyncWorker : public AsyncWorker { uv_work_t * tasks = (uv_work_t *)gcaware_malloc(tasksCount * sizeof(uv_work_t)); if (tasks == nullptr) { - this->setError("Failed to allocate memory"); + this->setError(WorkerError("Failed to allocate memory")); return false; } memset(tasks, 0, tasksCount * sizeof(uv_work_t)); @@ -4479,7 +5327,7 @@ class ParallelAsyncWorker : public AsyncWorker { &tasks[taskIndex], ParallelAsyncWorker::_parallelWork, ParallelAsyncWorker::_parallelDone) != 0) { - setError("Error starting async parallel task"); + setError(WorkerError("Error starting async parallel task")); break; } ++_pendingTasks; @@ -4551,7 +5399,7 @@ class RoaringBitmap32FactoryAsyncWorker : public AsyncWorker { protected: void done(v8::Local & result) override { if (this->bitmap == nullptr) { - return this->setError("Error deserializing roaring bitmap"); + return this->setError(WorkerError("Error deserializing roaring bitmap")); } v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(this->isolate); @@ -4559,12 +5407,12 @@ class RoaringBitmap32FactoryAsyncWorker : public AsyncWorker { v8::MaybeLocal resultMaybe = cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr); if (!resultMaybe.ToLocal(&result)) { - return this->setError("Error instantiating roaring bitmap"); + return this->setError(WorkerError("Error instantiating roaring bitmap")); } RoaringBitmap32 * unwrapped = ObjectWrap::TryUnwrap(result, isolate); if (unwrapped == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } unwrapped->replaceBitmapInstance(this->isolate, this->bitmap); @@ -4602,7 +5450,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { RoaringBitmap32 * self = ObjectWrap::TryUnwrap(info.Holder(), isolate); if (self == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } if (this->maybeAddonData == nullptr) { this->maybeAddonData = self->addonData; @@ -4654,7 +5502,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { // Allocate a new buffer this->allocatedBuffer = (uint32_t *)bare_aligned_malloc(32, size * sizeof(uint32_t)); if (!this->allocatedBuffer) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to allocate memory"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to allocate memory")); } if (maxSize < size) { @@ -4678,7 +5526,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { if (this->hasInput) { if (!v8utils::v8ValueToUint32ArrayWithLimit( isolate, this->inputContent.bufferPersistent.Get(isolate), this->outputSize, result)) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a UInt32Array range"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a UInt32Array range")); } return; } @@ -4693,27 +5541,27 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { v8::Local nodeBufferObject; if (!nodeBufferMaybeLocal.ToLocal(&nodeBufferObject)) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } v8::Local nodeBuffer = nodeBufferObject.As(); if (nodeBuffer.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } result = v8::Uint32Array::New(nodeBuffer->Buffer(), 0, this->outputSize); if (result.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } return; } auto arrayBuffer = v8::ArrayBuffer::New(isolate, 0); if (arrayBuffer.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer")); } result = v8::Uint32Array::New(arrayBuffer, 0, 0); if (result.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer")); } } }; @@ -4759,6 +5607,51 @@ class SerializeWorker final : public AsyncWorker { void done(v8::Local & result) final { this->serializer.done(this->isolate, result); } }; +class SerializeFileWorker final : public AsyncWorker { + public: + const v8::FunctionCallbackInfo & info; + v8::Persistent> bitmapPersistent; + RoaringBitmapFileSerializer serializer; + + explicit SerializeFileWorker(const v8::FunctionCallbackInfo & info, AddonData * maybeAddonData) : + AsyncWorker(info.GetIsolate(), maybeAddonData), info(info) { + gcaware_addAllocatedMemory(sizeof(SerializeFileWorker)); + } + + virtual ~SerializeFileWorker() { gcaware_removeAllocatedMemory(sizeof(SerializeFileWorker)); } + + protected: + // Called before the thread starts, in the main thread. + void before() final { + this->serializer.parseArguments(this->info); + if (this->serializer.self) { + if (this->maybeAddonData == nullptr) { + this->maybeAddonData = this->serializer.self->addonData; + } + this->bitmapPersistent.Reset(isolate, this->info.Holder()); + this->serializer.self->beginFreeze(); + } + } + + void work() final { + if (this->serializer.self) { + this->setError(this->serializer.serialize()); + } + } + + void finally() final { + if (this->serializer.self) { + this->serializer.self->endFreeze(); + } + } + + void done(v8::Local & result) final { + if (!this->bitmapPersistent.IsEmpty()) { + result = this->bitmapPersistent.Get(this->isolate); + } + } +}; + class DeserializeWorker final : public AsyncWorker { public: RoaringBitmapDeserializer deserializer; @@ -4779,7 +5672,7 @@ class DeserializeWorker final : public AsyncWorker { this->maybeAddonData = this->deserializer.targetBitmap->addonData; } if (this->maybeAddonData == nullptr && !this->hasError()) { - this->setError("RoaringBitmap32 deserialization failed to get the addon data"); + this->setError(WorkerError("RoaringBitmap32 deserialization failed to get the addon data")); } } } @@ -4792,12 +5685,52 @@ class DeserializeWorker final : public AsyncWorker { v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(isolate); if (!cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr).ToLocal(&result)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new instance"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); + } + + RoaringBitmap32 * self = ObjectWrap::TryUnwrap(result, isolate); + if (self == nullptr) { + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); + } + + self->replaceBitmapInstance(isolate, nullptr); + + this->deserializer.finalizeTargetBitmap(self); + } +}; + +/** + * Same as DeserializeWorker but it uses memory mapped files to deserialize the bitmaps. + */ +class DeserializeFileWorker final : public AsyncWorker { + public: + RoaringBitmapFileDeserializer deserializer; + const v8::FunctionCallbackInfo & info; + + explicit DeserializeFileWorker(const v8::FunctionCallbackInfo & info, AddonData * addonData) : + AsyncWorker(info.GetIsolate(), addonData), info(info) { + gcaware_addAllocatedMemory(sizeof(DeserializeFileWorker)); + } + + virtual ~DeserializeFileWorker() { gcaware_removeAllocatedMemory(sizeof(DeserializeFileWorker)); } + + protected: + void before() final { this->setError(this->deserializer.parseArguments(this->info)); } + + void work() final { this->setError(this->deserializer.deserialize()); } + + void done(v8::Local & result) { + v8::Isolate * isolate = this->isolate; + + v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(isolate); + + if (!cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr).ToLocal(&result)) { + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); } RoaringBitmap32 * self = ObjectWrap::TryUnwrap(result, isolate); if (self == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } self->replaceBitmapInstance(isolate, nullptr); @@ -4824,8 +5757,8 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { protected: virtual void parallelWork(uint32_t index) { RoaringBitmapDeserializer & item = items[index]; - const char * error = item.deserialize(); - if (error != nullptr) { + const WorkerError error = item.deserialize(); + if (error.hasError()) { this->setError(error); } } @@ -4839,7 +5772,7 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { v8::MaybeLocal resultArrayMaybe = v8::Array::New(isolate, itemsCount); v8::Local resultArray; if (!resultArrayMaybe.ToLocal(&resultArray)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new array"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new array")); } v8::Local currentContext = isolate->GetCurrentContext(); @@ -4848,12 +5781,12 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { v8::MaybeLocal instanceMaybe = cons->NewInstance(currentContext, 0, nullptr); v8::Local instance; if (!instanceMaybe.ToLocal(&instance)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new instance"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); } RoaringBitmap32 * unwrapped = ObjectWrap::TryUnwrap(instance, isolate); if (unwrapped == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } RoaringBitmapDeserializer & item = items[i]; @@ -4881,10 +5814,9 @@ class FromArrayAsyncWorker : public RoaringBitmap32FactoryAsyncWorker { void work() final { bitmap = roaring_bitmap_create_with_capacity(buffer.length); if (bitmap == nullptr) { - this->setError("Failed to allocate roaring bitmap"); + this->setError(WorkerError("Failed to allocate roaring bitmap")); return; } - roaring_bitmap_set_copy_on_write(bitmap, true); roaring_bitmap_add_many(bitmap, buffer.length, buffer.data); roaring_bitmap_run_optimize(bitmap); roaring_bitmap_shrink_to_fit(bitmap); @@ -4900,9 +5832,10 @@ void RoaringBitmap32_serialize(const v8::FunctionCallbackInfo & info) RoaringBitmapSerializer serializer; serializer.parseArguments(info); if (serializer.self) { - const char * error = serializer.serialize(); - if (error) { - return v8utils::throwError(isolate, error); + WorkerError error = serializer.serialize(); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } v8::Local result; serializer.done(isolate, result); @@ -4917,6 +5850,11 @@ void RoaringBitmap32_serializeAsync(const v8::FunctionCallbackInfo & info.GetReturnValue().Set(AsyncWorker::run(worker)); } +void RoaringBitmap32_serializeFileAsync(const v8::FunctionCallbackInfo & info) { + SerializeFileWorker * worker = new SerializeFileWorker(info, nullptr); + info.GetReturnValue().Set(AsyncWorker::run(worker)); +} + void RoaringBitmap32_unsafeFrozenViewStatic(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = info.GetIsolate(); @@ -4992,7 +5930,6 @@ void RoaringBitmap32_unsafeFrozenViewStatic(const v8::FunctionCallbackInforeplaceBitmapInstance(isolate, bitmap); @@ -5022,12 +5959,13 @@ void RoaringBitmap32_deserializeStatic(const v8::FunctionCallbackInfo self->replaceBitmapInstance(isolate, nullptr); RoaringBitmapDeserializer deserializer; - const char * error = deserializer.parseArguments(info, false); - if (!error) { + WorkerError error = deserializer.parseArguments(info, false); + if (!error.hasError()) { error = deserializer.deserialize(); } - if (error) { - return v8utils::throwError(isolate, error); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } deserializer.finalizeTargetBitmap(self); @@ -5039,12 +5977,13 @@ void RoaringBitmap32_deserialize(const v8::FunctionCallbackInfo & inf v8::Isolate * isolate = info.GetIsolate(); RoaringBitmapDeserializer deserializer; - const char * error = deserializer.parseArguments(info, true); - if (!error) { + WorkerError error = deserializer.parseArguments(info, true); + if (!error.hasError()) { error = deserializer.deserialize(); } - if (error) { - return v8utils::throwError(isolate, error); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } deserializer.targetBitmap->replaceBitmapInstance(isolate, nullptr); @@ -5054,9 +5993,8 @@ void RoaringBitmap32_deserialize(const v8::FunctionCallbackInfo & inf info.GetReturnValue().Set(info.Holder()); } -void RoaringBitmap32_deserializeStaticAsync(const v8::FunctionCallbackInfo & info) { +void RoaringBitmap32_deserializeAsyncStatic(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = v8::Isolate::GetCurrent(); - AddonData * addonData = AddonData::get(info); if (addonData == nullptr) { return v8utils::throwError(isolate, ERROR_INVALID_OBJECT); @@ -5075,6 +6013,26 @@ void RoaringBitmap32_deserializeStaticAsync(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = v8::Isolate::GetCurrent(); + AddonData * addonData = AddonData::get(info); + if (addonData == nullptr) { + return v8utils::throwError(isolate, ERROR_INVALID_OBJECT); + } + + auto * worker = new DeserializeFileWorker(info, addonData); + if (worker == nullptr) { + return v8utils::throwError(isolate, "RoaringBitmap32 deserialization failed to allocate async worker"); + } + + if (info.Length() >= 3 && info[2]->IsFunction()) { + worker->setCallback(info[2]); + } + + v8::Local returnValue = AsyncWorker::run(worker); + info.GetReturnValue().Set(returnValue); +} + void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = v8::Isolate::GetCurrent(); @@ -5093,12 +6051,12 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn } if (info.Length() < 2) { - worker->setError("RoaringBitmap32::deserializeAsync - requires at least two arguments"); + worker->setError(WorkerError("RoaringBitmap32::deserializeAsync - requires at least two arguments")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } if (!info[0]->IsArray()) { - worker->setError("RoaringBitmap32::deserializeParallelAsync requires an array as first argument"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync requires an array as first argument")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } @@ -5106,19 +6064,20 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn uint32_t length = array->Length(); if (length > 0x01FFFFFF) { - worker->setError("RoaringBitmap32::deserializeParallelAsync - array too big"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync - array too big")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } DeserializationFormat format = tryParseDeserializationFormat(info[1], isolate); if (format == DeserializationFormat::INVALID) { - worker->setError("RoaringBitmap32::deserializeAsync - second argument must be a valid deserialization format"); + worker->setError( + WorkerError("RoaringBitmap32::deserializeAsync - second argument must be a valid deserialization format")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } RoaringBitmapDeserializer * items = length ? new RoaringBitmapDeserializer[length]() : nullptr; if (items == nullptr && length != 0) { - worker->setError("RoaringBitmap32::deserializeParallelAsync - failed to allocate array of deserializers"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync - failed to allocate array of deserializers")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } @@ -5127,8 +6086,8 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn auto context = isolate->GetCurrentContext(); for (uint32_t i = 0; i != length; ++i) { - const char * err = items[i].setOutput(isolate, array->Get(context, i), format); - if (err != nullptr) { + WorkerError err = items[i].setOutput(isolate, array->Get(context, i), (FileDeserializationFormat)format); + if (err.hasError()) { worker->setError(err); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } @@ -5714,7 +6673,6 @@ void RoaringBitmap32_fromRangeStatic(const v8::FunctionCallbackInfo & if (getRangeOperationParameters(info, minInteger, maxInteger)) { roaring_bitmap_t * r = roaring_bitmap_from_range(minInteger, maxInteger, step); if (r != nullptr) { - roaring_bitmap_set_copy_on_write(r, true); self->replaceBitmapInstance(isolate, r); } } @@ -6574,6 +7532,7 @@ void RoaringBitmap32_Init(v8::Local exports, AddonData * addonData) NODE_SET_PROTOTYPE_METHOD(ctor, "select", RoaringBitmap32_select); NODE_SET_PROTOTYPE_METHOD(ctor, "serialize", RoaringBitmap32_serialize); NODE_SET_PROTOTYPE_METHOD(ctor, "serializeAsync", RoaringBitmap32_serializeAsync); + NODE_SET_PROTOTYPE_METHOD(ctor, "serializeFileAsync", RoaringBitmap32_serializeFileAsync); NODE_SET_PROTOTYPE_METHOD(ctor, "shift", RoaringBitmap32_shift); NODE_SET_PROTOTYPE_METHOD(ctor, "shrinkToFit", RoaringBitmap32_shrinkToFit); NODE_SET_PROTOTYPE_METHOD(ctor, "statistics", RoaringBitmap32_statistics); @@ -6599,7 +7558,8 @@ void RoaringBitmap32_Init(v8::Local exports, AddonData * addonData) v8utils::defineHiddenField(isolate, ctorObject, "default", ctorFunction); AddonData_setMethod(ctorObject, "deserialize", RoaringBitmap32_deserializeStatic, addonData); - AddonData_setMethod(ctorObject, "deserializeAsync", RoaringBitmap32_deserializeStaticAsync, addonData); + AddonData_setMethod(ctorObject, "deserializeAsync", RoaringBitmap32_deserializeAsyncStatic, addonData); + AddonData_setMethod(ctorObject, "deserializeFileAsync", RoaringBitmap32_deserializeFileAsyncStatic, addonData); AddonData_setMethod(ctorObject, "deserializeParallelAsync", RoaringBitmap32_deserializeParallelStaticAsync, addonData); ignoreMaybeResult( @@ -7140,33 +8100,32 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() { #if defined(__x86_64__) || defined(_M_AMD64) // x64 -#if defined(__cplusplus) +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP static inline uint32_t croaring_detect_supported_architectures() { // thread-safe as per the C++11 standard. static uint32_t buffer = dynamic_croaring_detect_supported_architectures(); return buffer; } -#elif CROARING_VISUAL_STUDIO -// Visual Studio does not support C11 atomics. -static inline uint32_t croaring_detect_supported_architectures() { - static int buffer = CROARING_UNINITIALIZED; +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +static uint32_t croaring_detect_supported_architectures() { + // we use an atomic for thread safety + static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; if (buffer == CROARING_UNINITIALIZED) { + // atomicity is sufficient buffer = dynamic_croaring_detect_supported_architectures(); } return buffer; } -#else // CROARING_VISUAL_STUDIO -#include -uint32_t croaring_detect_supported_architectures() { - // we use an atomic for thread safety - static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; +#else +// If we do not have atomics, we do the best we can. +static inline uint32_t croaring_detect_supported_architectures() { + static uint32_t buffer = CROARING_UNINITIALIZED; if (buffer == CROARING_UNINITIALIZED) { - // atomicity is sufficient buffer = dynamic_croaring_detect_supported_architectures(); } return buffer; } -#endif // CROARING_REGULAR_VISUAL_STUDIO +#endif // CROARING_C_ATOMIC #ifdef ROARING_DISABLE_AVX @@ -16557,11 +17516,10 @@ extern "C" { namespace roaring { namespace internal { * A shared container is a wrapper around a container * with reference counting. */ - STRUCT_CONTAINER(shared_container_s) { container_t *container; uint8_t typecode; - uint32_t counter; // to be managed atomically + croaring_refcount_t counter; // to be managed atomically }; typedef struct shared_container_s shared_container_t; @@ -19141,7 +20099,7 @@ container_t *get_copy_of_container( shared_container_t *shared_container; if (*typecode == SHARED_CONTAINER_TYPE) { shared_container = CAST_shared(c); - shared_container->counter += 1; + croaring_refcount_inc(&shared_container->counter); return shared_container; } assert(*typecode != SHARED_CONTAINER_TYPE); @@ -19153,7 +20111,10 @@ container_t *get_copy_of_container( shared_container->container = c; shared_container->typecode = *typecode; - + // At this point, we are creating new shared container + // so there should be no other references, and setting + // the counter to 2 - even non-atomically - is safe as + // long as the value is set before the return statement. shared_container->counter = 2; *typecode = SHARED_CONTAINER_TYPE; @@ -19192,12 +20153,10 @@ container_t *container_clone(const container_t *c, uint8_t typecode) { container_t *shared_container_extract_copy( shared_container_t *sc, uint8_t *typecode ){ - assert(sc->counter > 0); assert(sc->typecode != SHARED_CONTAINER_TYPE); - sc->counter--; *typecode = sc->typecode; container_t *answer; - if (sc->counter == 0) { + if (croaring_refcount_dec(&sc->counter)) { answer = sc->container; sc->container = NULL; // paranoid roaring_free(sc); @@ -19209,9 +20168,7 @@ container_t *shared_container_extract_copy( } void shared_container_free(shared_container_t *container) { - assert(container->counter > 0); - container->counter--; - if (container->counter == 0) { + if (croaring_refcount_dec(&container->counter)) { assert(container->typecode != SHARED_CONTAINER_TYPE); container_free(container->container, container->typecode); container->container = NULL; // paranoid @@ -23586,9 +24543,9 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { get_full_container_name(ra->containers[i], ra->typecodes[i]), container_get_cardinality(ra->containers[i], ra->typecodes[i])); if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { - printf( - "(shared count = %" PRIu32 " )", - CAST_shared(ra->containers[i])->counter); + printf("(shared count = %" PRIu32 " )", + croaring_refcount_get( + &(CAST_shared(ra->containers[i])->counter))); } if (i + 1 < ra->size) { diff --git a/src/cpp/RoaringBitmap32-main.h b/src/cpp/RoaringBitmap32-main.h index 0f2622f..7a976f0 100644 --- a/src/cpp/RoaringBitmap32-main.h +++ b/src/cpp/RoaringBitmap32-main.h @@ -857,6 +857,7 @@ void RoaringBitmap32_Init(v8::Local exports, AddonData * addonData) NODE_SET_PROTOTYPE_METHOD(ctor, "select", RoaringBitmap32_select); NODE_SET_PROTOTYPE_METHOD(ctor, "serialize", RoaringBitmap32_serialize); NODE_SET_PROTOTYPE_METHOD(ctor, "serializeAsync", RoaringBitmap32_serializeAsync); + NODE_SET_PROTOTYPE_METHOD(ctor, "serializeFileAsync", RoaringBitmap32_serializeFileAsync); NODE_SET_PROTOTYPE_METHOD(ctor, "shift", RoaringBitmap32_shift); NODE_SET_PROTOTYPE_METHOD(ctor, "shrinkToFit", RoaringBitmap32_shrinkToFit); NODE_SET_PROTOTYPE_METHOD(ctor, "statistics", RoaringBitmap32_statistics); @@ -882,7 +883,8 @@ void RoaringBitmap32_Init(v8::Local exports, AddonData * addonData) v8utils::defineHiddenField(isolate, ctorObject, "default", ctorFunction); AddonData_setMethod(ctorObject, "deserialize", RoaringBitmap32_deserializeStatic, addonData); - AddonData_setMethod(ctorObject, "deserializeAsync", RoaringBitmap32_deserializeStaticAsync, addonData); + AddonData_setMethod(ctorObject, "deserializeAsync", RoaringBitmap32_deserializeAsyncStatic, addonData); + AddonData_setMethod(ctorObject, "deserializeFileAsync", RoaringBitmap32_deserializeFileAsyncStatic, addonData); AddonData_setMethod(ctorObject, "deserializeParallelAsync", RoaringBitmap32_deserializeParallelStaticAsync, addonData); ignoreMaybeResult( diff --git a/src/cpp/RoaringBitmap32-ranges.h b/src/cpp/RoaringBitmap32-ranges.h index 32fc2e8..62d6553 100644 --- a/src/cpp/RoaringBitmap32-ranges.h +++ b/src/cpp/RoaringBitmap32-ranges.h @@ -531,7 +531,6 @@ void RoaringBitmap32_fromRangeStatic(const v8::FunctionCallbackInfo & if (getRangeOperationParameters(info, minInteger, maxInteger)) { roaring_bitmap_t * r = roaring_bitmap_from_range(minInteger, maxInteger, step); if (r != nullptr) { - roaring_bitmap_set_copy_on_write(r, true); self->replaceBitmapInstance(isolate, r); } } diff --git a/src/cpp/RoaringBitmap32-serialization.h b/src/cpp/RoaringBitmap32-serialization.h index 977fbe6..18203e0 100644 --- a/src/cpp/RoaringBitmap32-serialization.h +++ b/src/cpp/RoaringBitmap32-serialization.h @@ -11,9 +11,10 @@ void RoaringBitmap32_serialize(const v8::FunctionCallbackInfo & info) RoaringBitmapSerializer serializer; serializer.parseArguments(info); if (serializer.self) { - const char * error = serializer.serialize(); - if (error) { - return v8utils::throwError(isolate, error); + WorkerError error = serializer.serialize(); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } v8::Local result; serializer.done(isolate, result); @@ -28,6 +29,11 @@ void RoaringBitmap32_serializeAsync(const v8::FunctionCallbackInfo & info.GetReturnValue().Set(AsyncWorker::run(worker)); } +void RoaringBitmap32_serializeFileAsync(const v8::FunctionCallbackInfo & info) { + SerializeFileWorker * worker = new SerializeFileWorker(info, nullptr); + info.GetReturnValue().Set(AsyncWorker::run(worker)); +} + void RoaringBitmap32_unsafeFrozenViewStatic(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = info.GetIsolate(); @@ -103,7 +109,6 @@ void RoaringBitmap32_unsafeFrozenViewStatic(const v8::FunctionCallbackInforeplaceBitmapInstance(isolate, bitmap); @@ -133,12 +138,13 @@ void RoaringBitmap32_deserializeStatic(const v8::FunctionCallbackInfo self->replaceBitmapInstance(isolate, nullptr); RoaringBitmapDeserializer deserializer; - const char * error = deserializer.parseArguments(info, false); - if (!error) { + WorkerError error = deserializer.parseArguments(info, false); + if (!error.hasError()) { error = deserializer.deserialize(); } - if (error) { - return v8utils::throwError(isolate, error); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } deserializer.finalizeTargetBitmap(self); @@ -150,12 +156,13 @@ void RoaringBitmap32_deserialize(const v8::FunctionCallbackInfo & inf v8::Isolate * isolate = info.GetIsolate(); RoaringBitmapDeserializer deserializer; - const char * error = deserializer.parseArguments(info, true); - if (!error) { + WorkerError error = deserializer.parseArguments(info, true); + if (!error.hasError()) { error = deserializer.deserialize(); } - if (error) { - return v8utils::throwError(isolate, error); + if (error.hasError()) { + isolate->ThrowException(error.newV8Error(isolate)); + return; } deserializer.targetBitmap->replaceBitmapInstance(isolate, nullptr); @@ -165,9 +172,8 @@ void RoaringBitmap32_deserialize(const v8::FunctionCallbackInfo & inf info.GetReturnValue().Set(info.Holder()); } -void RoaringBitmap32_deserializeStaticAsync(const v8::FunctionCallbackInfo & info) { +void RoaringBitmap32_deserializeAsyncStatic(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = v8::Isolate::GetCurrent(); - AddonData * addonData = AddonData::get(info); if (addonData == nullptr) { return v8utils::throwError(isolate, ERROR_INVALID_OBJECT); @@ -186,6 +192,26 @@ void RoaringBitmap32_deserializeStaticAsync(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = v8::Isolate::GetCurrent(); + AddonData * addonData = AddonData::get(info); + if (addonData == nullptr) { + return v8utils::throwError(isolate, ERROR_INVALID_OBJECT); + } + + auto * worker = new DeserializeFileWorker(info, addonData); + if (worker == nullptr) { + return v8utils::throwError(isolate, "RoaringBitmap32 deserialization failed to allocate async worker"); + } + + if (info.Length() >= 3 && info[2]->IsFunction()) { + worker->setCallback(info[2]); + } + + v8::Local returnValue = AsyncWorker::run(worker); + info.GetReturnValue().Set(returnValue); +} + void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackInfo & info) { v8::Isolate * isolate = v8::Isolate::GetCurrent(); @@ -204,12 +230,12 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn } if (info.Length() < 2) { - worker->setError("RoaringBitmap32::deserializeAsync - requires at least two arguments"); + worker->setError(WorkerError("RoaringBitmap32::deserializeAsync - requires at least two arguments")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } if (!info[0]->IsArray()) { - worker->setError("RoaringBitmap32::deserializeParallelAsync requires an array as first argument"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync requires an array as first argument")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } @@ -217,19 +243,20 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn uint32_t length = array->Length(); if (length > 0x01FFFFFF) { - worker->setError("RoaringBitmap32::deserializeParallelAsync - array too big"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync - array too big")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } DeserializationFormat format = tryParseDeserializationFormat(info[1], isolate); if (format == DeserializationFormat::INVALID) { - worker->setError("RoaringBitmap32::deserializeAsync - second argument must be a valid deserialization format"); + worker->setError( + WorkerError("RoaringBitmap32::deserializeAsync - second argument must be a valid deserialization format")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } RoaringBitmapDeserializer * items = length ? new RoaringBitmapDeserializer[length]() : nullptr; if (items == nullptr && length != 0) { - worker->setError("RoaringBitmap32::deserializeParallelAsync - failed to allocate array of deserializers"); + worker->setError(WorkerError("RoaringBitmap32::deserializeParallelAsync - failed to allocate array of deserializers")); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } @@ -238,8 +265,8 @@ void RoaringBitmap32_deserializeParallelStaticAsync(const v8::FunctionCallbackIn auto context = isolate->GetCurrentContext(); for (uint32_t i = 0; i != length; ++i) { - const char * err = items[i].setOutput(isolate, array->Get(context, i), format); - if (err != nullptr) { + WorkerError err = items[i].setOutput(isolate, array->Get(context, i), (FileDeserializationFormat)format); + if (err.hasError()) { worker->setError(err); return info.GetReturnValue().Set(AsyncWorker::run(worker)); } diff --git a/src/cpp/RoaringBitmap32.h b/src/cpp/RoaringBitmap32.h index f884ece..d17a534 100644 --- a/src/cpp/RoaringBitmap32.h +++ b/src/cpp/RoaringBitmap32.h @@ -3,6 +3,7 @@ #include "v8utils.h" #include "serialization-format.h" +#include "WorkerError.h" using namespace roaring; using namespace roaring::api; @@ -114,9 +115,6 @@ class RoaringBitmap32 final : public ObjectWrap { _version(0), frozenCounter(0), readonlyViewOf(nullptr) { - if (this->roaring) { - roaring_bitmap_set_copy_on_write(this->roaring, true); - } ++addonData->RoaringBitmap32_instances; gcaware_addAllocatedMemory(sizeof(RoaringBitmap32)); } diff --git a/src/cpp/WorkerError.h b/src/cpp/WorkerError.h new file mode 100644 index 0000000..7139515 --- /dev/null +++ b/src/cpp/WorkerError.h @@ -0,0 +1,43 @@ +#ifndef ROARING_NODE_WORKER_ERROR_ +#define ROARING_NODE_WORKER_ERROR_ + +#include "includes.h" +#include "v8utils.h" + +struct WorkerError { + const char * msg; + const char * syscall; + int errorno; + std::string path; + + explicit WorkerError() : msg(nullptr), syscall(nullptr), errorno(0) {} + + explicit WorkerError(const char * msg) : msg(msg), syscall(nullptr), errorno(0) {} + + explicit WorkerError(int errorno, const char * syscall, const std::string & path) : + msg(nullptr), syscall(syscall), errorno(errorno ? errorno : 5), path(path) {} + + inline bool hasError() const { return (msg != nullptr && msg[0] != '\0') || errorno != 0; } + + static WorkerError from_errno(const char * syscall, const std::string & path) { + int errorno = errno; + errno = 0; + return WorkerError(errorno, syscall, path); + } + + v8::Local newV8Error(v8::Isolate * isolate) const { + v8::EscapableHandleScope handle_scope(isolate); + v8::Local output; + if (this->errorno) { + output = node::ErrnoException( + isolate, this->errorno, this->syscall, this->msg && this->msg[0] ? this->msg : nullptr, this->path.c_str()); + } else { + const char * msg = this->msg && this->msg[0] ? this->msg : "Invalid operation"; + v8::MaybeLocal message = v8::String::NewFromUtf8(isolate, msg, v8::NewStringType::kInternalized); + output = v8::Exception::Error(message.IsEmpty() ? v8::String::Empty(isolate) : message.ToLocalChecked()); + } + return handle_scope.Escape(output); + } +}; + +#endif diff --git a/src/cpp/async-workers.h b/src/cpp/async-workers.h index 599af18..a9a2b0a 100644 --- a/src/cpp/async-workers.h +++ b/src/cpp/async-workers.h @@ -3,6 +3,7 @@ #include "RoaringBitmap32.h" #include "RoaringBitmap32-serialization.h" +#include "WorkerError.h" uint32_t getCpusCount() { static uint32_t _cpusCountCache = 0; @@ -43,15 +44,15 @@ class AsyncWorker { inline bool hasStarted() const { return this->_started; } - inline bool hasError() const { return this->_error != nullptr; } + inline bool hasError() const { return this->_error.hasError(); } - inline void setError(const_char_ptr_t error) { - if (error != nullptr && this->_error == nullptr) { + inline void setError(const WorkerError & error) { + if (error.hasError() && !this->_error.hasError()) { this->_error = error; } } - inline void clearError() { this->_error = nullptr; } + inline void clearError() { this->_error = WorkerError(); } static v8::Local run(AsyncWorker * worker) { v8::EscapableHandleScope scope(worker->isolate); @@ -70,7 +71,7 @@ class AsyncWorker { auto promise = resolver->GetPromise(); if (promise.IsEmpty()) { - worker->setError("Failed to create Promise"); + worker->setError(WorkerError("Failed to create Promise")); } else { returnValue = promise; worker->_resolver.Reset(isolate, resolver); @@ -131,7 +132,7 @@ class AsyncWorker { private: uv_work_t _task{}; - volatile const_char_ptr_t _error; + WorkerError _error; bool _started; volatile bool _completed; v8::Persistent> _callback; @@ -142,7 +143,7 @@ class AsyncWorker { if ( uv_queue_work(node::GetCurrentEventLoop(v8::Isolate::GetCurrent()), &_task, AsyncWorker::_work, AsyncWorker::_done) != 0) { - setError("Error starting async thread"); + setError(WorkerError("Error starting async thread")); return false; } return true; @@ -162,7 +163,7 @@ class AsyncWorker { v8::Local result; - if (worker->_error == nullptr && error.IsEmpty()) { + if (!worker->_error.hasError() && error.IsEmpty()) { worker->done(result); } @@ -179,13 +180,11 @@ class AsyncWorker { } if (result.IsEmpty() && error.IsEmpty()) { - worker->setError("Async operation failed"); + worker->setError(WorkerError("Async operation failed")); } if (worker->hasError() && error.IsEmpty()) { - v8::MaybeLocal message = - v8::String::NewFromUtf8(isolate, worker->_error, v8::NewStringType::kInternalized); - error = v8::Exception::Error(message.IsEmpty() ? v8::String::Empty(isolate) : message.ToLocalChecked()); + error = worker->_error.newV8Error(isolate); } auto context = isolate->GetCurrentContext(); @@ -249,7 +248,7 @@ class AsyncWorker { thread_local_isolate = worker->isolate; if (status != 0) { - worker->setError("Error executing async thread"); + worker->setError(WorkerError("Error executing async thread")); } _complete(worker); @@ -258,7 +257,7 @@ class AsyncWorker { v8::Local _makeError(v8::Local error) { if (error.IsEmpty() || error->IsNull() || error->IsUndefined()) { - this->setError("Exception in async operation"); + this->setError(WorkerError("Exception in async operation")); return {}; } if (!error->IsObject()) { @@ -321,7 +320,7 @@ class ParallelAsyncWorker : public AsyncWorker { uv_work_t * tasks = (uv_work_t *)gcaware_malloc(tasksCount * sizeof(uv_work_t)); if (tasks == nullptr) { - this->setError("Failed to allocate memory"); + this->setError(WorkerError("Failed to allocate memory")); return false; } memset(tasks, 0, tasksCount * sizeof(uv_work_t)); @@ -339,7 +338,7 @@ class ParallelAsyncWorker : public AsyncWorker { &tasks[taskIndex], ParallelAsyncWorker::_parallelWork, ParallelAsyncWorker::_parallelDone) != 0) { - setError("Error starting async parallel task"); + setError(WorkerError("Error starting async parallel task")); break; } ++_pendingTasks; @@ -411,7 +410,7 @@ class RoaringBitmap32FactoryAsyncWorker : public AsyncWorker { protected: void done(v8::Local & result) override { if (this->bitmap == nullptr) { - return this->setError("Error deserializing roaring bitmap"); + return this->setError(WorkerError("Error deserializing roaring bitmap")); } v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(this->isolate); @@ -419,12 +418,12 @@ class RoaringBitmap32FactoryAsyncWorker : public AsyncWorker { v8::MaybeLocal resultMaybe = cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr); if (!resultMaybe.ToLocal(&result)) { - return this->setError("Error instantiating roaring bitmap"); + return this->setError(WorkerError("Error instantiating roaring bitmap")); } RoaringBitmap32 * unwrapped = ObjectWrap::TryUnwrap(result, isolate); if (unwrapped == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } unwrapped->replaceBitmapInstance(this->isolate, this->bitmap); @@ -462,7 +461,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { RoaringBitmap32 * self = ObjectWrap::TryUnwrap(info.Holder(), isolate); if (self == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } if (this->maybeAddonData == nullptr) { this->maybeAddonData = self->addonData; @@ -514,7 +513,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { // Allocate a new buffer this->allocatedBuffer = (uint32_t *)bare_aligned_malloc(32, size * sizeof(uint32_t)); if (!this->allocatedBuffer) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to allocate memory"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to allocate memory")); } if (maxSize < size) { @@ -538,7 +537,7 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { if (this->hasInput) { if (!v8utils::v8ValueToUint32ArrayWithLimit( isolate, this->inputContent.bufferPersistent.Get(isolate), this->outputSize, result)) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a UInt32Array range"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a UInt32Array range")); } return; } @@ -553,27 +552,27 @@ class ToUint32ArrayAsyncWorker final : public AsyncWorker { v8::Local nodeBufferObject; if (!nodeBufferMaybeLocal.ToLocal(&nodeBufferObject)) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } v8::Local nodeBuffer = nodeBufferObject.As(); if (nodeBuffer.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } result = v8::Uint32Array::New(nodeBuffer->Buffer(), 0, this->outputSize); if (result.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create a new buffer")); } return; } auto arrayBuffer = v8::ArrayBuffer::New(isolate, 0); if (arrayBuffer.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer")); } result = v8::Uint32Array::New(arrayBuffer, 0, 0); if (result.IsEmpty()) { - return this->setError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer"); + return this->setError(WorkerError("RoaringBitmap32::toUint32ArrayAsync - failed to create an empty ArrayBuffer")); } } }; @@ -619,6 +618,51 @@ class SerializeWorker final : public AsyncWorker { void done(v8::Local & result) final { this->serializer.done(this->isolate, result); } }; +class SerializeFileWorker final : public AsyncWorker { + public: + const v8::FunctionCallbackInfo & info; + v8::Persistent> bitmapPersistent; + RoaringBitmapFileSerializer serializer; + + explicit SerializeFileWorker(const v8::FunctionCallbackInfo & info, AddonData * maybeAddonData) : + AsyncWorker(info.GetIsolate(), maybeAddonData), info(info) { + gcaware_addAllocatedMemory(sizeof(SerializeFileWorker)); + } + + virtual ~SerializeFileWorker() { gcaware_removeAllocatedMemory(sizeof(SerializeFileWorker)); } + + protected: + // Called before the thread starts, in the main thread. + void before() final { + this->serializer.parseArguments(this->info); + if (this->serializer.self) { + if (this->maybeAddonData == nullptr) { + this->maybeAddonData = this->serializer.self->addonData; + } + this->bitmapPersistent.Reset(isolate, this->info.Holder()); + this->serializer.self->beginFreeze(); + } + } + + void work() final { + if (this->serializer.self) { + this->setError(this->serializer.serialize()); + } + } + + void finally() final { + if (this->serializer.self) { + this->serializer.self->endFreeze(); + } + } + + void done(v8::Local & result) final { + if (!this->bitmapPersistent.IsEmpty()) { + result = this->bitmapPersistent.Get(this->isolate); + } + } +}; + class DeserializeWorker final : public AsyncWorker { public: RoaringBitmapDeserializer deserializer; @@ -639,7 +683,7 @@ class DeserializeWorker final : public AsyncWorker { this->maybeAddonData = this->deserializer.targetBitmap->addonData; } if (this->maybeAddonData == nullptr && !this->hasError()) { - this->setError("RoaringBitmap32 deserialization failed to get the addon data"); + this->setError(WorkerError("RoaringBitmap32 deserialization failed to get the addon data")); } } } @@ -652,12 +696,52 @@ class DeserializeWorker final : public AsyncWorker { v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(isolate); if (!cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr).ToLocal(&result)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new instance"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); + } + + RoaringBitmap32 * self = ObjectWrap::TryUnwrap(result, isolate); + if (self == nullptr) { + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); + } + + self->replaceBitmapInstance(isolate, nullptr); + + this->deserializer.finalizeTargetBitmap(self); + } +}; + +/** + * Same as DeserializeWorker but it uses memory mapped files to deserialize the bitmaps. + */ +class DeserializeFileWorker final : public AsyncWorker { + public: + RoaringBitmapFileDeserializer deserializer; + const v8::FunctionCallbackInfo & info; + + explicit DeserializeFileWorker(const v8::FunctionCallbackInfo & info, AddonData * addonData) : + AsyncWorker(info.GetIsolate(), addonData), info(info) { + gcaware_addAllocatedMemory(sizeof(DeserializeFileWorker)); + } + + virtual ~DeserializeFileWorker() { gcaware_removeAllocatedMemory(sizeof(DeserializeFileWorker)); } + + protected: + void before() final { this->setError(this->deserializer.parseArguments(this->info)); } + + void work() final { this->setError(this->deserializer.deserialize()); } + + void done(v8::Local & result) { + v8::Isolate * isolate = this->isolate; + + v8::Local cons = this->maybeAddonData->RoaringBitmap32_constructor.Get(isolate); + + if (!cons->NewInstance(isolate->GetCurrentContext(), 0, nullptr).ToLocal(&result)) { + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); } RoaringBitmap32 * self = ObjectWrap::TryUnwrap(result, isolate); if (self == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } self->replaceBitmapInstance(isolate, nullptr); @@ -684,8 +768,8 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { protected: virtual void parallelWork(uint32_t index) { RoaringBitmapDeserializer & item = items[index]; - const char * error = item.deserialize(); - if (error != nullptr) { + const WorkerError error = item.deserialize(); + if (error.hasError()) { this->setError(error); } } @@ -699,7 +783,7 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { v8::MaybeLocal resultArrayMaybe = v8::Array::New(isolate, itemsCount); v8::Local resultArray; if (!resultArrayMaybe.ToLocal(&resultArray)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new array"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new array")); } v8::Local currentContext = isolate->GetCurrentContext(); @@ -708,12 +792,12 @@ class DeserializeParallelWorker : public ParallelAsyncWorker { v8::MaybeLocal instanceMaybe = cons->NewInstance(currentContext, 0, nullptr); v8::Local instance; if (!instanceMaybe.ToLocal(&instance)) { - return this->setError("RoaringBitmap32 deserialization failed to create a new instance"); + return this->setError(WorkerError("RoaringBitmap32 deserialization failed to create a new instance")); } RoaringBitmap32 * unwrapped = ObjectWrap::TryUnwrap(instance, isolate); if (unwrapped == nullptr) { - return this->setError(ERROR_INVALID_OBJECT); + return this->setError(WorkerError(ERROR_INVALID_OBJECT)); } RoaringBitmapDeserializer & item = items[i]; @@ -741,10 +825,9 @@ class FromArrayAsyncWorker : public RoaringBitmap32FactoryAsyncWorker { void work() final { bitmap = roaring_bitmap_create_with_capacity(buffer.length); if (bitmap == nullptr) { - this->setError("Failed to allocate roaring bitmap"); + this->setError(WorkerError("Failed to allocate roaring bitmap")); return; } - roaring_bitmap_set_copy_on_write(bitmap, true); roaring_bitmap_add_many(bitmap, buffer.length, buffer.data); roaring_bitmap_run_optimize(bitmap); roaring_bitmap_shrink_to_fit(bitmap); diff --git a/src/cpp/memory.h b/src/cpp/memory.h index b6ea72a..6787aad 100644 --- a/src/cpp/memory.h +++ b/src/cpp/memory.h @@ -105,8 +105,8 @@ void * gcaware_calloc(size_t count, size_t size) { void gcaware_free(void * memory) { if (memory != nullptr) { gcaware_removeAllocatedMemory(bare_malloc_size(memory)); + free(memory); } - free(memory); } void * gcaware_aligned_malloc(size_t alignment, size_t size) { @@ -120,8 +120,8 @@ void * gcaware_aligned_malloc(size_t alignment, size_t size) { void gcaware_aligned_free(void * memory) { if (memory != nullptr) { gcaware_removeAllocatedMemory(bare_aligned_malloc_size(memory)); + bare_aligned_free(memory); } - bare_aligned_free(memory); } void bare_aligned_free_callback(char * data, void * hint) { bare_aligned_free(data); } diff --git a/src/cpp/mmap.h b/src/cpp/mmap.h new file mode 100644 index 0000000..5018155 --- /dev/null +++ b/src/cpp/mmap.h @@ -0,0 +1,106 @@ +#ifndef ROARING_NODE_MMAP_ +#define ROARING_NODE_MMAP_ + +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) + +/* mmap() replacement for Windows + * + * Author: Mike Frysinger + * Placed into the public domain + */ + +/* References: + * CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx + * CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx + * MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx + * UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx + */ + +# include +# include +# include + +# define PROT_READ 0x1 +# define PROT_WRITE 0x2 +/* This flag is only available in WinXP+ */ +# ifdef FILE_MAP_EXECUTE +# define PROT_EXEC 0x4 +# else +# define PROT_EXEC 0x0 +# define FILE_MAP_EXECUTE 0 +# endif + +# define MAP_SHARED 0x01 +# define MAP_PRIVATE 0x02 +# define MAP_ANONYMOUS 0x20 +# define MAP_ANON MAP_ANONYMOUS +# define MAP_FAILED ((void *)-1) + +# ifdef __USE_FILE_OFFSET64 +# define DWORD_HI(x) (x >> 32) +# define DWORD_LO(x) ((x)&0xffffffff) +# else +# define DWORD_HI(x) (0) +# define DWORD_LO(x) (x) +# endif + +static void * mmap(void * start, size_t length, int prot, int flags, int fd, off_t offset) { + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return MAP_FAILED; + if (fd == -1) { + if (!(flags & MAP_ANON) || offset) return MAP_FAILED; + } else if (flags & MAP_ANON) + return MAP_FAILED; + + DWORD flProtect; + if (prot & PROT_WRITE) { + if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE_READWRITE; + else + flProtect = PAGE_READWRITE; + } else if (prot & PROT_EXEC) { + if (prot & PROT_READ) + flProtect = PAGE_EXECUTE_READ; + else if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE; + } else + flProtect = PAGE_READONLY; + + off_t end = length + offset; + HANDLE mmap_fd, h; + if (fd == -1) + mmap_fd = INVALID_HANDLE_VALUE; + else + mmap_fd = (HANDLE)_get_osfhandle(fd); + h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); + if (h == NULL) return MAP_FAILED; + + DWORD dwDesiredAccess; + if (prot & PROT_WRITE) + dwDesiredAccess = FILE_MAP_WRITE; + else + dwDesiredAccess = FILE_MAP_READ; + if (prot & PROT_EXEC) dwDesiredAccess |= FILE_MAP_EXECUTE; + if (flags & MAP_PRIVATE) dwDesiredAccess |= FILE_MAP_COPY; + void * ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); + if (ret == NULL) { + CloseHandle(h); + ret = MAP_FAILED; + } + return ret; +} + +static void munmap(void * addr, size_t length) { + UnmapViewOfFile(addr); + /* ruh-ro, we leaked handle from CreateFileMapping() ... */ +} + +# undef DWORD_HI +# undef DWORD_LO + +#else + +# include +# include + +#endif +#endif diff --git a/src/cpp/serialization-csv.h b/src/cpp/serialization-csv.h new file mode 100644 index 0000000..186d81c --- /dev/null +++ b/src/cpp/serialization-csv.h @@ -0,0 +1,208 @@ +#ifndef ROARING_NODE_SERIALIZATION_CSV_ +#define ROARING_NODE_SERIALIZATION_CSV_ + +#include "includes.h" +#include +#include "mmap.h" +#include "serialization-format.h" +#include "WorkerError.h" + +struct CsvFileDescriptorSerializer final { + public: + static int iterate(const roaring::api::roaring_bitmap_t * r, int fd, FileSerializationFormat format) { + char separator; + switch (format) { + case FileSerializationFormat::newline_separated_values: separator = '\n'; break; + case FileSerializationFormat::comma_separated_values: separator = ','; break; + case FileSerializationFormat::tab_separated_values: separator = '\t'; break; + case FileSerializationFormat::json_array: separator = ','; break; + default: return EINVAL; + } + + CsvFileDescriptorSerializer writer(fd, separator); + if (format == FileSerializationFormat::json_array) { + writer.appendChar('['); + } + + if (r) { + roaring_iterate(r, roaringIteratorFn, &writer); + } + + if (format == FileSerializationFormat::newline_separated_values) { + writer.appendChar('\n'); + } else if (format == FileSerializationFormat::json_array) { + writer.appendChar(']'); + } + + if (!writer.flush()) { + int errorno = errno; + errno = 0; + return errorno ? errorno : EIO; + } + + return 0; + } + + private: + const constexpr static size_t BUFFER_SIZE = 131072; + + char * buf; + size_t bufPos; + int fd; + bool needsSeparator; + char separator; + + CsvFileDescriptorSerializer(int fd, char separator) : + buf((char *)gcaware_aligned_malloc(32, BUFFER_SIZE)), bufPos(0), fd(fd), needsSeparator(false), separator(separator) {} + + ~CsvFileDescriptorSerializer() { gcaware_aligned_free(this->buf); } + + bool flush() { + if (this->bufPos == 0) { + return true; + } + if (!this->buf) { + return false; + } + ssize_t written = write(this->fd, this->buf, this->bufPos); + if (written < 0) { + gcaware_aligned_free(this->buf); + this->buf = nullptr; + return false; + } + this->bufPos = 0; + return true; + } + + bool appendChar(char c) { + if (this->bufPos + 1 >= BUFFER_SIZE) { + if (!this->flush()) { + return false; + } + } + if (!this->buf) { + return false; + } + this->buf[this->bufPos++] = c; + return true; + } + + bool appendValue(uint32_t value) { + if (this->bufPos + 15 >= BUFFER_SIZE) { + if (!this->flush()) { + return false; + } + } + if (!this->buf) { + return false; + } + if (this->needsSeparator) { + this->buf[this->bufPos++] = this->separator; + } + this->needsSeparator = true; + + char * str = this->buf + this->bufPos; + int32_t i, j; + char c; + + /* uint to decimal */ + i = 0; + do { + uint32_t remainder = value % 10; + str[i++] = (char)(remainder + 48); + value = value / 10; + } while (value != 0); + + this->bufPos += i; + + /* reverse string */ + for (j = 0, i--; j < i; j++, i--) { + c = str[i]; + str[i] = str[j]; + str[j] = c; + } + + return true; + } + + static bool roaringIteratorFn(uint32_t value, void * param) { + return ((CsvFileDescriptorSerializer *)param)->appendValue(value); + } +}; + +WorkerError deserializeRoaringCsvFile( + roaring::api::roaring_bitmap_t * r, int fd, const char * input, size_t input_size, const std::string & filePath) { + const constexpr static size_t BUFFER_SIZE = 131072; + + char * buf; + ssize_t readBytes; + if (input == nullptr) { + buf = (char *)gcaware_aligned_malloc(32, BUFFER_SIZE); + if (!buf) { + return WorkerError("Failed to allocate memory for text deserialization"); + } + } else { + buf = (char *)input; + readBytes = (ssize_t)input_size; + if (readBytes < 0) { + return WorkerError("Input too big"); + } + if (readBytes == 0) { + return WorkerError(); + } + } + + roaring_bulk_context_t context; + memset(&context, 0, sizeof(context)); + uint64_t value = 0; + + bool hasValue = false; + bool isNegative = false; + for (;;) { + if (input == nullptr) { + readBytes = read(fd, buf, BUFFER_SIZE); + if (readBytes <= 0) { + if (readBytes < 0) { + WorkerError err = WorkerError::from_errno("read", filePath); + gcaware_aligned_free(buf); + return err; + } + break; + } + } + + for (ssize_t i = 0; i < readBytes; i++) { + char c = buf[i]; + if (c >= '0' && c <= '9') { + if (value <= 0xffffffff) { + hasValue = true; + value = value * 10 + (c - '0'); + } + } else { + if (hasValue) { + hasValue = false; + if (!isNegative && value <= 0xffffffff) { + roaring_bitmap_add_bulk(r, &context, value); + } + } + value = 0; + isNegative = c == '-'; + } + } + + if (input != nullptr) { + break; + } + } + + if (!isNegative && hasValue && value <= 0xffffffff) { + roaring_bitmap_add_bulk(r, &context, value); + } + if (input == nullptr) { + gcaware_aligned_free(buf); + } + + return WorkerError(); +} + +#endif diff --git a/src/cpp/serialization-format.h b/src/cpp/serialization-format.h index 164bdbe..8c80d25 100644 --- a/src/cpp/serialization-format.h +++ b/src/cpp/serialization-format.h @@ -10,6 +10,20 @@ enum class SerializationFormat { croaring = 0, portable = 1, unsafe_frozen_croaring = 2, + uint32_array = 4, +}; + +enum class FileSerializationFormat { + INVALID = -1, + croaring = 0, + portable = 1, + unsafe_frozen_croaring = 2, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 }; enum class DeserializationFormat { @@ -18,6 +32,26 @@ enum class DeserializationFormat { portable = 1, unsafe_frozen_croaring = 2, unsafe_frozen_portable = 3, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 +}; + +enum class FileDeserializationFormat { + INVALID = -1, + croaring = 0, + portable = 1, + unsafe_frozen_croaring = 2, + unsafe_frozen_portable = 3, + uint32_array = 4, + + comma_separated_values = 10, + tab_separated_values = 11, + newline_separated_values = 12, + json_array = 20 }; enum class FrozenViewFormat { @@ -48,10 +82,39 @@ SerializationFormat tryParseSerializationFormat(const v8::Local & val if (strcmp(*formatString, "unsafe_frozen_croaring") == 0) { return SerializationFormat::unsafe_frozen_croaring; } + if (strcmp(*formatString, "uint32_array") == 0) { + return SerializationFormat::uint32_array; + } } return SerializationFormat::INVALID; } +FileSerializationFormat tryParseFileSerializationFormat(const v8::Local & value, v8::Isolate * isolate) { + SerializationFormat sf = tryParseSerializationFormat(value, isolate); + if (sf != SerializationFormat::INVALID) { + return static_cast(sf); + } + if (!isolate || value.IsEmpty()) { + return FileSerializationFormat::INVALID; + } + if (value->IsString()) { + v8::String::Utf8Value formatString(isolate, value); + if (strcmp(*formatString, "comma_separated_values") == 0) { + return FileSerializationFormat::comma_separated_values; + } + if (strcmp(*formatString, "tab_separated_values") == 0) { + return FileSerializationFormat::tab_separated_values; + } + if (strcmp(*formatString, "newline_separated_values") == 0) { + return FileSerializationFormat::newline_separated_values; + } + if (strcmp(*formatString, "json_array") == 0) { + return FileSerializationFormat::json_array; + } + } + return FileSerializationFormat::INVALID; +} + DeserializationFormat tryParseDeserializationFormat(const v8::Local & value, v8::Isolate * isolate) { if (!isolate || value.IsEmpty()) { return DeserializationFormat::INVALID; @@ -77,10 +140,29 @@ DeserializationFormat tryParseDeserializationFormat(const v8::Local & if (strcmp(*formatString, "unsafe_frozen_portable") == 0) { return DeserializationFormat::unsafe_frozen_portable; } + if (strcmp(*formatString, "uint32_array") == 0) { + return DeserializationFormat::uint32_array; + } + if (strcmp(*formatString, "comma_separated_values") == 0) { + return DeserializationFormat::comma_separated_values; + } + if (strcmp(*formatString, "tab_separated_values") == 0) { + return DeserializationFormat::tab_separated_values; + } + if (strcmp(*formatString, "newline_separated_values") == 0) { + return DeserializationFormat::newline_separated_values; + } + if (strcmp(*formatString, "json_array") == 0) { + return DeserializationFormat::json_array; + } } return DeserializationFormat::INVALID; } +FileDeserializationFormat tryParseFileDeserializationFormat(const v8::Local & value, v8::Isolate * isolate) { + return (FileDeserializationFormat)tryParseDeserializationFormat(value, isolate); +} + FrozenViewFormat tryParseFrozenViewFormat(const v8::Local & value, v8::Isolate * isolate) { if (!isolate || value.IsEmpty()) { return FrozenViewFormat::INVALID; diff --git a/src/cpp/serialization.h b/src/cpp/serialization.h index 28caa0d..810b2bf 100644 --- a/src/cpp/serialization.h +++ b/src/cpp/serialization.h @@ -2,6 +2,12 @@ #define ROARING_NODE_SERIALIZATION_ #include "RoaringBitmap32.h" +#include "serialization-csv.h" +#include "mmap.h" + +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) +# include +#endif #ifndef CROARING_SERIALIZATION_ARRAY_UINT32 constexpr const unsigned char CROARING_SERIALIZATION_ARRAY_UINT32 = 1; @@ -11,13 +17,97 @@ constexpr const unsigned char CROARING_SERIALIZATION_ARRAY_UINT32 = 1; constexpr const unsigned char CROARING_SERIALIZATION_CONTAINER = 2; #endif -class RoaringBitmapSerializer final { +class RoaringBitmapSerializerBase { + private: + bool serializeArray = false; + size_t cardinality = 0; + public: RoaringBitmap32 * self = nullptr; - SerializationFormat format = SerializationFormat::INVALID; - v8utils::TypedArrayContent inputBuffer; + FileSerializationFormat format = FileSerializationFormat::INVALID; size_t volatile serializedSize = 0; + + WorkerError computeSerializedSize() { + size_t buffersize; + switch (this->format) { + case FileSerializationFormat::croaring: { + this->cardinality = this->self->getSize(); + auto sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + auto portablesize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); + if (portablesize < sizeasarray || sizeasarray >= MAX_SERIALIZATION_ARRAY_SIZE_IN_BYTES - 1) { + buffersize = portablesize + 1; + } else { + this->serializeArray = true; + buffersize = (size_t)sizeasarray + 1; + } + break; + } + + case FileSerializationFormat::portable: { + buffersize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); + break; + } + + case FileSerializationFormat::unsafe_frozen_croaring: { + buffersize = roaring_bitmap_frozen_size_in_bytes(this->self->roaring); + break; + } + + case FileSerializationFormat::uint32_array: { + buffersize = this->self->getSize() * sizeof(uint32_t); + break; + } + + default: return WorkerError("RoaringBitmap32 serialization format is invalid"); + } + + this->serializedSize = buffersize; + return WorkerError(); + } + + WorkerError serializeToBuffer(uint8_t * data) { + if (!data) { + return WorkerError("RoaringBitmap32 serialization allocation failed"); + } + + switch (format) { + case FileSerializationFormat::croaring: { + if (serializeArray) { + ((uint8_t *)data)[0] = CROARING_SERIALIZATION_ARRAY_UINT32; + memcpy(data + 1, &this->cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)(data + 1 + sizeof(uint32_t))); + } else { + ((uint8_t *)data)[0] = CROARING_SERIALIZATION_CONTAINER; + roaring_bitmap_portable_serialize(self->roaring, (char *)data + 1); + } + break; + } + + case FileSerializationFormat::portable: { + roaring_bitmap_portable_serialize(self->roaring, (char *)data); + break; + } + + case FileSerializationFormat::unsafe_frozen_croaring: { + roaring_bitmap_frozen_serialize(self->roaring, (char *)data); + break; + } + + case FileSerializationFormat::uint32_array: { + roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)data); + break; + } + + default: return WorkerError("RoaringBitmap32 serialization format is invalid"); + } + return WorkerError(); + } +}; + +class RoaringBitmapSerializer final : public RoaringBitmapSerializerBase { + public: + v8utils::TypedArrayContent inputBuffer; uint8_t * volatile allocatedBuffer = nullptr; void parseArguments(const v8::FunctionCallbackInfo & info) { @@ -47,8 +137,8 @@ class RoaringBitmapSerializer final { return v8utils::throwError(isolate, "RoaringBitmap32 serialization buffer argument was invalid"); } } - this->format = tryParseSerializationFormat(info[formatArgIndex], isolate); - if (this->format == SerializationFormat::INVALID) { + this->format = static_cast(tryParseSerializationFormat(info[formatArgIndex], isolate)); + if (this->format == FileSerializationFormat::INVALID) { return v8utils::throwError(isolate, "RoaringBitmap32 serialization format argument was invalid"); } if (bufferArgIndex >= 0) { @@ -59,77 +149,23 @@ class RoaringBitmapSerializer final { this->self = bitmap; } - const char * serialize() { - size_t buffersize; - size_t cardinality; - bool serializeArray = 0; - switch (this->format) { - case SerializationFormat::croaring: { - cardinality = this->self->getSize(); - auto sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - auto portablesize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); - if (portablesize < sizeasarray || sizeasarray >= MAX_SERIALIZATION_ARRAY_SIZE_IN_BYTES - 1) { - buffersize = portablesize + 1; - } else { - serializeArray = true; - buffersize = (size_t)sizeasarray + 1; - } - break; - } - - case SerializationFormat::portable: { - buffersize = roaring_bitmap_portable_size_in_bytes(this->self->roaring); - break; - } - - case SerializationFormat::unsafe_frozen_croaring: { - buffersize = roaring_bitmap_frozen_size_in_bytes(this->self->roaring); - break; - } - - default: return "RoaringBitmap32 serialization format is invalid"; + WorkerError serialize() { + WorkerError err = this->computeSerializedSize(); + if (err.hasError()) { + return err; } - this->serializedSize = buffersize; uint8_t * data = this->inputBuffer.data; if (data == nullptr) { - data = - (uint8_t *)bare_aligned_malloc(this->format == SerializationFormat::unsafe_frozen_croaring ? 32 : 8, buffersize); - if (!data) { - return "RoaringBitmap32 serialization allocation failed"; - } + data = (uint8_t *)bare_aligned_malloc( + this->format == FileSerializationFormat::unsafe_frozen_croaring ? 32 : 8, this->serializedSize); this->allocatedBuffer = data; - } else if (this->inputBuffer.length < buffersize) { - return "RoaringBitmap32 serialization buffer is too small"; + } else if (this->inputBuffer.length < this->serializedSize) { + return WorkerError("RoaringBitmap32 serialization buffer is too small"); } - switch (format) { - case SerializationFormat::croaring: { - if (serializeArray) { - data[0] = CROARING_SERIALIZATION_ARRAY_UINT32; - memcpy(data + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array(self->roaring, (uint32_t *)(data + 1 + sizeof(uint32_t))); - } else { - data[0] = CROARING_SERIALIZATION_CONTAINER; - roaring_bitmap_portable_serialize(self->roaring, (char *)data + 1); - } - break; - } - - case SerializationFormat::portable: { - roaring_bitmap_portable_serialize(self->roaring, (char *)data); - break; - } - - case SerializationFormat::unsafe_frozen_croaring: { - roaring_bitmap_frozen_serialize(self->roaring, (char *)data); - break; - } - - default: return "RoaringBitmap32 serialization format is invalid"; - } - return nullptr; + return this->serializeToBuffer(data); } void done(v8::Isolate * isolate, v8::Local & result) { @@ -158,166 +194,244 @@ class RoaringBitmapSerializer final { ~RoaringBitmapSerializer() { bare_aligned_free(this->allocatedBuffer); } }; -class RoaringBitmapDeserializer final { +class RoaringBitmapFileSerializer final : public RoaringBitmapSerializerBase { public: - DeserializationFormat format = DeserializationFormat::INVALID; - v8::Isolate * isolate = nullptr; - - RoaringBitmap32 * targetBitmap = nullptr; - v8utils::TypedArrayContent inputBuffer; + std::string filePath; - roaring_bitmap_t_ptr volatile roaring = nullptr; - uint8_t * volatile frozenBuffer = nullptr; + void parseArguments(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = info.GetIsolate(); + v8::HandleScope scope(isolate); - ~RoaringBitmapDeserializer() { - if (this->frozenBuffer != nullptr) { - bare_aligned_free(this->frozenBuffer); + RoaringBitmap32 * bitmap = ObjectWrap::TryUnwrap(info.Holder(), isolate); + if (bitmap == nullptr) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization on invalid object"); } - if (this->roaring) { - roaring_bitmap_free(this->roaring); + if (info.Length() < 2) { + return v8utils::throwError(isolate, "RoaringBitmap32::serializeFileAsync requires 2 arguments"); + } + if (!info[0]->IsString()) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization file path argument was invalid"); } + + this->format = tryParseFileSerializationFormat(info[1], isolate); + if (this->format == FileSerializationFormat::INVALID) { + return v8utils::throwError(isolate, "RoaringBitmap32 serialization format argument was invalid"); + } + + v8::String::Utf8Value filePathUtf8(isolate, info[0]); + this->filePath = std::string(*filePathUtf8, filePathUtf8.length()); + this->self = bitmap; } - const char * setOutput(v8::Isolate * isolate, const v8::MaybeLocal & valueMaybe, DeserializationFormat format) { - this->isolate = isolate; - this->format = format; + WorkerError serialize() { + switch (this->format) { + case FileSerializationFormat::comma_separated_values: + case FileSerializationFormat::tab_separated_values: + case FileSerializationFormat::newline_separated_values: + case FileSerializationFormat::json_array: { + int fd = open(this->filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + return WorkerError::from_errno("open", this->filePath); + } + int errorno = CsvFileDescriptorSerializer::iterate(this->self->roaring, fd, this->format); + close(fd); + return errorno != 0 ? WorkerError(errorno, "write", this->filePath) : WorkerError(); + } - if (valueMaybe.IsEmpty()) { - return nullptr; + default: break; } - v8::Local v; - if (!valueMaybe.ToLocal(&v) || v->IsNullOrUndefined()) { - return nullptr; + WorkerError err = this->computeSerializedSize(); + if (err.hasError()) { + return err; } - if (!this->inputBuffer.set(isolate, v)) { - return "RoaringBitmap32 deserialization output argument was not a valid typed array"; + int fd = open(this->filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + return WorkerError::from_errno("open", this->filePath); } - return nullptr; - } +#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__) + int truncateErr = _chsize_s(fd, this->serializedSize); + if (truncateErr != 0) { + err = WorkerError(truncateErr, "_chsize_s", this->filePath); + close(fd); + return err; + } +#else + if (ftruncate(fd, this->serializedSize) < 0) { + err = WorkerError::from_errno("ftruncate", this->filePath); + close(fd); + return err; + } +#endif - const char * parseArguments(const v8::FunctionCallbackInfo & info, bool isInstanceMethod) { - v8::Isolate * isolate = info.GetIsolate(); - this->isolate = isolate; - v8::HandleScope scope(isolate); + if (this->serializedSize != 0) { + uint8_t * data = (uint8_t *)mmap(nullptr, this->serializedSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) { + // mmap failed, allocate and write to buffer instead + data = (uint8_t *)gcaware_aligned_malloc(32, this->serializedSize); + if (data) { + err = this->serializeToBuffer(data); + if (!err.hasError()) { + auto wresult = write(fd, data, this->serializedSize); + if (wresult < 0) { + err = WorkerError::from_errno("write", this->filePath); + close(fd); + } + } + gcaware_aligned_free(data); + return err; + } - if (isInstanceMethod) { - this->targetBitmap = ObjectWrap::TryUnwrap(info.Holder(), isolate); - if (this->targetBitmap == nullptr) { - return "RoaringBitmap32 deserialization on invalid object"; - } - if (this->targetBitmap->isFrozen()) { - return ERROR_FROZEN; + err = WorkerError::from_errno("mmap", this->filePath); + close(fd); + return err; } - } - if (info.Length() < 2) { - return "RoaringBitmap32 deserialization expects a format and a buffer arguments"; - } + err = this->serializeToBuffer(data); - int bufferArgIndex = 1; - DeserializationFormat fmt = tryParseDeserializationFormat(info[0], isolate); - if (fmt == DeserializationFormat::INVALID) { - bufferArgIndex = 0; - fmt = tryParseDeserializationFormat(info[1], isolate); - } - this->format = fmt; + if (err.hasError()) { + close(fd); + return err; + } - if ( - !info[bufferArgIndex]->IsNullOrUndefined() && - !this->inputBuffer.set(isolate, info[bufferArgIndex]->ToObject(isolate->GetCurrentContext()))) { - return "RoaringBitmap32 deserialization buffer argument was invalid"; + munmap(data, this->serializedSize); } - return nullptr; + close(fd); + return err; } +}; - const char * deserialize() { - if (this->format == DeserializationFormat::INVALID) { - return "RoaringBitmap32 deserialization format argument was invalid"; +class RoaringBitmapDeserializerBase { + public: + FileDeserializationFormat format = FileDeserializationFormat::INVALID; + v8::Isolate * isolate = nullptr; + roaring_bitmap_t_ptr volatile roaring = nullptr; + uint8_t * volatile frozenBuffer = nullptr; + + ~RoaringBitmapDeserializerBase() { + if (this->frozenBuffer != nullptr) { + bare_aligned_free(this->frozenBuffer); + } + if (this->roaring) { + roaring_bitmap_free(this->roaring); } + } - auto bufLen = this->inputBuffer.length; - const char * bufaschar = (const char *)this->inputBuffer.data; + WorkerError deserializeBuf(const char * bufaschar, size_t bufLen) { + if (this->format == FileDeserializationFormat::INVALID) { + return WorkerError("RoaringBitmap32 deserialization format argument was invalid"); + } if (bufLen == 0 || !bufaschar) { // Empty bitmap for an empty buffer. this->roaring = roaring_bitmap_create(); if (!this->roaring) { - return "RoaringBitmap32 deserialization failed to create an empty bitmap"; + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + return WorkerError(); } switch (this->format) { - case DeserializationFormat::portable: { + case FileDeserializationFormat::portable: { this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar, bufLen); if (!this->roaring) { - return "RoaringBitmap32::deserialize - portable deserialization failed"; + return WorkerError("RoaringBitmap32 deserialization - portable deserialization failed"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + return WorkerError(); } - case DeserializationFormat::croaring: { + case FileDeserializationFormat::croaring: { switch ((unsigned char)bufaschar[0]) { case CROARING_SERIALIZATION_ARRAY_UINT32: { uint32_t card; memcpy(&card, bufaschar + 1, sizeof(uint32_t)); if (card * sizeof(uint32_t) + sizeof(uint32_t) + 1 != bufLen) { - return "RoaringBitmap32 deserialization corrupted data, wrong cardinality header"; + return WorkerError("RoaringBitmap32 deserialization corrupted data, wrong cardinality header"); } const uint32_t * elems = (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); this->roaring = roaring_bitmap_of_ptr(card, elems); if (!this->roaring) { - return "RoaringBitmap32 deserialization - uint32 array deserialization failed"; + return WorkerError("RoaringBitmap32 deserialization - uint32 array deserialization failed"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + return WorkerError(); } case CROARING_SERIALIZATION_CONTAINER: { this->roaring = roaring_bitmap_portable_deserialize_safe(bufaschar + 1, bufLen - 1); if (!this->roaring) { - return "RoaringBitmap32 deserialization - container deserialization failed"; + return WorkerError("RoaringBitmap32 deserialization - container deserialization failed"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + return WorkerError(); } } - return "RoaringBitmap32 deserialization - invalid portable header byte"; + return WorkerError("RoaringBitmap32 deserialization - invalid portable header byte"); } - case DeserializationFormat::unsafe_frozen_portable: - case DeserializationFormat::unsafe_frozen_croaring: { + case FileDeserializationFormat::unsafe_frozen_portable: + case FileDeserializationFormat::unsafe_frozen_croaring: { this->frozenBuffer = (uint8_t *)bare_aligned_malloc(32, bufLen); if (!this->frozenBuffer) { - return "RoaringBitmap32 deserialization - failed to allocate memory for frozen bitmap"; + return WorkerError("RoaringBitmap32 deserialization - failed to allocate memory for frozen bitmap"); } memcpy(this->frozenBuffer, bufaschar, bufLen); - if (format == DeserializationFormat::unsafe_frozen_croaring) { + if (format == FileDeserializationFormat::unsafe_frozen_croaring) { this->roaring = const_cast(roaring_bitmap_frozen_view((const char *)this->frozenBuffer, bufLen)); - return this->roaring ? nullptr : "RoaringBitmap32 deserialization - failed to create a frozen view"; + return this->roaring ? WorkerError() + : WorkerError("RoaringBitmap32 deserialization - failed to create a frozen view"); } this->roaring = const_cast(roaring_bitmap_portable_deserialize_frozen((const char *)this->frozenBuffer)); if (!this->roaring) { - return "RoaringBitmap32 deserialization - failed to create a frozen view"; + return WorkerError("RoaringBitmap32 deserialization - failed to create a frozen view"); } - roaring_bitmap_set_copy_on_write(this->roaring, true); - return nullptr; + return WorkerError(); } - default: return "RoaringBitmap32::deserialize - unknown deserialization format"; + case FileDeserializationFormat::uint32_array: { + if (bufLen % 4 != 0) { + return WorkerError( + "RoaringBitmap32 deserialization - uint32 array deserialization failed, input length is not a multiple of 4"); + } + + if (bufLen == 0) { + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); + } + return WorkerError(); + } + + this->roaring = roaring_bitmap_of_ptr(bufLen >> 2, (const uint32_t *)bufaschar); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization - uint32 array deserialization failed"); + } + return WorkerError(); + } + + case FileDeserializationFormat::comma_separated_values: + case FileDeserializationFormat::tab_separated_values: + case FileDeserializationFormat::newline_separated_values: + case FileDeserializationFormat::json_array: { + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); + } + if (bufaschar != nullptr) { + return deserializeRoaringCsvFile(this->roaring, -1, bufaschar, bufLen, ""); + } + return WorkerError(); + } + + default: return WorkerError("RoaringBitmap32 deserialization - unknown deserialization format"); } } @@ -334,4 +448,172 @@ class RoaringBitmapDeserializer final { } }; +class RoaringBitmapDeserializer final : public RoaringBitmapDeserializerBase { + public: + RoaringBitmap32 * targetBitmap = nullptr; + v8utils::TypedArrayContent inputBuffer; + + WorkerError setOutput( + v8::Isolate * isolate, const v8::MaybeLocal & valueMaybe, FileDeserializationFormat format) { + this->isolate = isolate; + this->format = format; + + if (valueMaybe.IsEmpty()) { + return WorkerError(); + } + + v8::Local v; + if (!valueMaybe.ToLocal(&v) || v->IsNullOrUndefined()) { + return WorkerError(); + } + + if (!this->inputBuffer.set(isolate, v)) { + return WorkerError("RoaringBitmap32 deserialization output argument was not a valid typed array"); + } + + return WorkerError(); + } + + WorkerError parseArguments(const v8::FunctionCallbackInfo & info, bool isInstanceMethod) { + v8::Isolate * isolate = info.GetIsolate(); + this->isolate = isolate; + v8::HandleScope scope(isolate); + + if (isInstanceMethod) { + this->targetBitmap = ObjectWrap::TryUnwrap(info.Holder(), isolate); + if (this->targetBitmap == nullptr) { + return WorkerError("RoaringBitmap32 deserialization on invalid object"); + } + if (this->targetBitmap->isFrozen()) { + return WorkerError(ERROR_FROZEN); + } + } + + if (info.Length() < 2) { + return WorkerError("RoaringBitmap32 deserialization expects a format and a buffer arguments"); + } + + int bufferArgIndex = 1; + DeserializationFormat fmt = tryParseDeserializationFormat(info[0], isolate); + if (fmt == DeserializationFormat::INVALID) { + bufferArgIndex = 0; + fmt = tryParseDeserializationFormat(info[1], isolate); + } + this->format = static_cast(fmt); + + if ( + !info[bufferArgIndex]->IsNullOrUndefined() && + !this->inputBuffer.set(isolate, info[bufferArgIndex]->ToObject(isolate->GetCurrentContext()))) { + return WorkerError("RoaringBitmap32 deserialization buffer argument was invalid"); + } + + return WorkerError(); + } + + WorkerError deserialize() { return this->deserializeBuf((const char *)this->inputBuffer.data, this->inputBuffer.length); } +}; + +class RoaringBitmapFileDeserializer final : public RoaringBitmapDeserializerBase { + public: + std::string filePath; + + WorkerError parseArguments(const v8::FunctionCallbackInfo & info) { + v8::Isolate * isolate = info.GetIsolate(); + this->isolate = isolate; + v8::HandleScope scope(isolate); + + if (info.Length() < 2) { + return WorkerError("RoaringBitmap32::deserializeFileAsync expects a file path and format"); + } + + if (!info[0]->IsString()) { + return WorkerError("RoaringBitmap32::deserializeFileAsync expects a file path as the first argument"); + } + + v8::String::Utf8Value filePathUtf8(isolate, info[0]); + this->filePath = std::string(*filePathUtf8, filePathUtf8.length()); + + FileDeserializationFormat fmt = tryParseFileDeserializationFormat(info[1], isolate); + if (fmt == FileDeserializationFormat::INVALID) { + return WorkerError("RoaringBitmap32::deserializeFileAsync invalid format"); + } + this->format = fmt; + return WorkerError(); + } + + WorkerError deserialize() { + int fd = open(this->filePath.c_str(), O_RDONLY); + if (fd == -1) { + return WorkerError::from_errno("open", this->filePath); + } + + switch (this->format) { + case FileDeserializationFormat::comma_separated_values: + case FileDeserializationFormat::tab_separated_values: + case FileDeserializationFormat::newline_separated_values: + case FileDeserializationFormat::json_array: { + this->roaring = roaring_bitmap_create(); + if (!this->roaring) { + return WorkerError("RoaringBitmap32 deserialization failed to create an empty bitmap"); + } + WorkerError err = deserializeRoaringCsvFile(this->roaring, fd, nullptr, 0, this->filePath); + close(fd); + return err; + } + + default: break; + } + + struct stat st; + if (fstat(fd, &st) == -1) { + WorkerError err = WorkerError::from_errno("fstat", this->filePath); + close(fd); + return err; + } + + size_t fileSize = st.st_size; + + if (fileSize == 0) { + WorkerError err = this->deserializeBuf(nullptr, 0); + close(fd); + return err; + } + + void * buf = mmap(nullptr, fileSize, PROT_READ, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + // mmap failed, try to read the file into a buffer + buf = gcaware_aligned_malloc(32, fileSize); + if (buf != nullptr) { + ssize_t bytesRead = read(fd, buf, fileSize); + if (bytesRead == -1) { + WorkerError err = WorkerError::from_errno("read", this->filePath); + close(fd); + gcaware_aligned_free(buf); + return err; + } + if ((size_t)bytesRead != fileSize) { + WorkerError err = WorkerError("RoaringBitmap32::deserializeFileAsync read less bytes than expected"); + close(fd); + gcaware_aligned_free(buf); + return err; + } + WorkerError err = this->deserializeBuf((const char *)buf, fileSize); + gcaware_aligned_free(buf); + close(fd); + return err; + } + + WorkerError err = WorkerError::from_errno("mmap", this->filePath); + close(fd); + return err; + } + + WorkerError err = this->deserializeBuf((const char *)buf, fileSize); + + munmap(buf, fileSize); + close(fd); + return err; + } +}; + #endif // ROARING_NODE_SERIALIZATION_ diff --git a/submodules/CRoaring b/submodules/CRoaring index d2bf554..5d6dd23 160000 --- a/submodules/CRoaring +++ b/submodules/CRoaring @@ -1 +1 @@ -Subproject commit d2bf554494b084d7f3993d01b8c6effc0ff84b16 +Subproject commit 5d6dd2342d9e3ffaf481aa5ebe344e19984faa4a diff --git a/test/RoaringBitmap32/RoaringBitmap32.frozen.test.ts b/test/RoaringBitmap32/RoaringBitmap32.frozen.test.ts index f5f37cb..9739e13 100644 --- a/test/RoaringBitmap32/RoaringBitmap32.frozen.test.ts +++ b/test/RoaringBitmap32/RoaringBitmap32.frozen.test.ts @@ -1,9 +1,23 @@ +import { FrozenViewFormat } from "../.."; import RoaringBitmap32 from "../../RoaringBitmap32"; import { expect } from "chai"; const ERROR_FROZEN = "This bitmap is frozen and cannot be modified"; describe("RoaringBitmap32 frozen", () => { + describe("FrozenViewFormat", () => { + it("should have the right values", () => { + expect(FrozenViewFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); + expect(FrozenViewFormat.unsafe_frozen_portable).eq("unsafe_frozen_portable"); + + expect(Object.values(FrozenViewFormat)).to.deep.eq(["unsafe_frozen_croaring", "unsafe_frozen_portable"]); + + expect(RoaringBitmap32.FrozenViewFormat).to.eq(FrozenViewFormat); + + expect(new RoaringBitmap32().FrozenViewFormat).to.eq(FrozenViewFormat); + }); + }); + describe("freeze", () => { it("set isFrozen to true, return this, can be called multiple times", () => { const bitmap = new RoaringBitmap32(); diff --git a/test/RoaringBitmap32/RoaringBitmap32.serialization-file.test.ts b/test/RoaringBitmap32/RoaringBitmap32.serialization-file.test.ts new file mode 100644 index 0000000..ffa60b7 --- /dev/null +++ b/test/RoaringBitmap32/RoaringBitmap32.serialization-file.test.ts @@ -0,0 +1,180 @@ +import RoaringBitmap32 from "../../RoaringBitmap32"; +import { expect, use as chaiUse } from "chai"; +import path from "path"; +import fs from "fs"; +import type { FileSerializationDeserializationFormatType } from "../.."; +import { FileDeserializationFormat, FileSerializationFormat } from "../.."; + +const tmpDir = path.resolve(__dirname, "..", "..", ".tmp", "tests"); + +chaiUse(require("chai-as-promised")); + +describe("RoaringBitmap32 file serialization", () => { + before(() => { + if (!fs.existsSync(tmpDir)) { + fs.mkdirSync(tmpDir, { recursive: true }); + } + }); + + describe("FileSerializationFormat", () => { + it("should have the right values", () => { + expect(FileSerializationFormat.croaring).eq("croaring"); + expect(FileSerializationFormat.portable).eq("portable"); + expect(FileSerializationFormat.uint32_array).eq("uint32_array"); + expect(FileSerializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); + expect(FileSerializationFormat.comma_separated_values).eq("comma_separated_values"); + expect(FileSerializationFormat.tab_separated_values).eq("tab_separated_values"); + expect(FileSerializationFormat.newline_separated_values).eq("newline_separated_values"); + expect(FileSerializationFormat.json_array).eq("json_array"); + + expect(Object.values(FileSerializationFormat)).to.deep.eq([ + "croaring", + "portable", + "unsafe_frozen_croaring", + "uint32_array", + "comma_separated_values", + "tab_separated_values", + "newline_separated_values", + "json_array", + ]); + + expect(RoaringBitmap32.FileSerializationFormat).to.eq(FileSerializationFormat); + + expect(new RoaringBitmap32().FileSerializationFormat).to.eq(FileSerializationFormat); + }); + }); + + describe("FileDeserializationFormat", () => { + it("should have the right values", () => { + expect(FileDeserializationFormat.croaring).eq("croaring"); + expect(FileDeserializationFormat.portable).eq("portable"); + expect(FileDeserializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); + expect(FileDeserializationFormat.unsafe_frozen_portable).eq("unsafe_frozen_portable"); + expect(FileDeserializationFormat.comma_separated_values).eq("comma_separated_values"); + expect(FileDeserializationFormat.tab_separated_values).eq("tab_separated_values"); + expect(FileDeserializationFormat.newline_separated_values).eq("newline_separated_values"); + expect(FileDeserializationFormat.json_array).eq("json_array"); + + expect(Object.values(FileDeserializationFormat)).to.deep.eq([ + "croaring", + "portable", + "unsafe_frozen_croaring", + "unsafe_frozen_portable", + "uint32_array", + "comma_separated_values", + "tab_separated_values", + "newline_separated_values", + "json_array", + ]); + + expect(RoaringBitmap32.FileDeserializationFormat).to.eq(FileDeserializationFormat); + + expect(new RoaringBitmap32().FileDeserializationFormat).to.eq(FileDeserializationFormat); + }); + }); + + it("serialize and deserialize empty bitmaps in various formats", async () => { + const formats: FileSerializationDeserializationFormatType[] = [ + "portable", + "croaring", + "unsafe_frozen_croaring", + "uint32_array", + "comma_separated_values", + "tab_separated_values", + "newline_separated_values", + "json_array", + ]; + for (const format of formats) { + const tmpFilePath = path.resolve(tmpDir, `test-ϴϮ-${format}.bin`); + await new RoaringBitmap32().serializeFileAsync(tmpFilePath, format); + expect((await RoaringBitmap32.deserializeFileAsync(tmpFilePath, format)).toArray()).to.deep.equal([]); + } + }); + + it("serialize and deserialize in various formats", async () => { + for (const format of ["portable", "croaring", "unsafe_frozen_croaring", "uint32_array"] as const) { + const tmpFilePath = path.resolve(tmpDir, `test-ϴϮ-${format}.bin`); + const data = [1, 2, 3, 100, 0xfffff, 0xffffffff]; + await new RoaringBitmap32(data).serializeFileAsync(tmpFilePath, format); + expect((await RoaringBitmap32.deserializeFileAsync(tmpFilePath, format)).toArray()).to.deep.equal(data); + } + }); + + it("serializeFileAsync truncates file if it already exists", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-truncate.bin`); + await fs.promises.writeFile(tmpFilePath, Buffer.alloc(10000)); + await new RoaringBitmap32([1, 2, 3]).serializeFileAsync(tmpFilePath, "portable"); + expect((await RoaringBitmap32.deserializeFileAsync(tmpFilePath, "portable")).toArray()).to.deep.equal([1, 2, 3]); + }); + + it("throws ENOENT if file does not exist", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-ENOENT.bin`); + let error: any; + try { + await RoaringBitmap32.deserializeFileAsync(tmpFilePath, "portable"); + } catch (e) { + error = e; + } + expect(error).to.be.an.instanceOf(Error); + expect(error.message).to.match(/^ENOENT, No such file or directory/); + expect(error.code).to.equal("ENOENT"); + expect(error.syscall).to.equal("open"); + expect(error.path).to.equal(tmpFilePath); + }); + + it("serializes to comma_separated_values", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-csv.csv`); + const bmp = new RoaringBitmap32([1, 2, 3, 100, 14120, 3481983]); + bmp.addRange(0x100, 0x120); + await bmp.serializeFileAsync(tmpFilePath, "comma_separated_values"); + const text = await fs.promises.readFile(tmpFilePath, "utf8"); + expect(text).to.equal( + "1,2,3,100,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,14120,3481983", + ); + }); + + it("serializes to newline_separated_values", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-nlf.csv`); + const bmp = new RoaringBitmap32([1, 2, 3, 100, 14120, 3481983]); + bmp.addRange(0x100, 0x120); + await bmp.serializeFileAsync(tmpFilePath, "newline_separated_values"); + const text = await fs.promises.readFile(tmpFilePath, "utf8"); + expect(text).to.equal( + "1,2,3,100,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,14120,3481983,".replace( + /,/g, + "\n", + ), + ); + }); + + it("serializes to tab_separated_values", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-ntab.csv`); + const bmp = new RoaringBitmap32([1, 2, 3, 100, 14120, 3481983]); + bmp.addRange(0x100, 0x120); + await bmp.serializeFileAsync(tmpFilePath, "tab_separated_values"); + const text = await fs.promises.readFile(tmpFilePath, "utf8"); + expect(text).to.equal( + "1,2,3,100,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,14120,3481983".replace( + /,/g, + "\t", + ), + ); + }); + + it("serializes to an empty json array", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-json-array-empty.csv`); + await new RoaringBitmap32().serializeFileAsync(tmpFilePath, "json_array"); + expect(await fs.promises.readFile(tmpFilePath, "utf8")).to.equal("[]"); + }); + + it("serializes to a json array", async () => { + const tmpFilePath = path.resolve(tmpDir, `test-json-array.csv`); + const bmp = new RoaringBitmap32([1, 2, 3, 100, 14120, 3481983]); + bmp.addRange(0x100, 0x120); + await bmp.serializeFileAsync(tmpFilePath, "json_array"); + const text = await fs.promises.readFile(tmpFilePath, "utf8"); + expect(text).to.equal( + "[1,2,3,100,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,14120,3481983]", + ); + }); +}); diff --git a/test/RoaringBitmap32/RoaringBitmap32.serialization.test.ts b/test/RoaringBitmap32/RoaringBitmap32.serialization.test.ts index a58b6f6..562d1b7 100644 --- a/test/RoaringBitmap32/RoaringBitmap32.serialization.test.ts +++ b/test/RoaringBitmap32/RoaringBitmap32.serialization.test.ts @@ -1,9 +1,59 @@ import RoaringBitmap32 from "../../RoaringBitmap32"; +import { DeserializationFormat, SerializationFormat } from "../.."; import { expect } from "chai"; describe("RoaringBitmap32 serialization", () => { const data = [1, 2, 3, 4, 5, 6, 100, 101, 105, 109, 0x7fffffff, 0xfffffffe, 0xffffffff]; + describe("SerializationFormat", () => { + it("should have the right values", () => { + expect(SerializationFormat.croaring).eq("croaring"); + expect(SerializationFormat.portable).eq("portable"); + expect(SerializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); + expect(SerializationFormat.uint32_array).eq("uint32_array"); + + expect(Object.values(SerializationFormat)).to.deep.eq([ + "croaring", + "portable", + "unsafe_frozen_croaring", + "uint32_array", + ]); + + expect(RoaringBitmap32.SerializationFormat).to.eq(SerializationFormat); + + expect(new RoaringBitmap32().SerializationFormat).to.eq(SerializationFormat); + }); + }); + + describe("DeserializationFormat", () => { + it("should have the right values", () => { + expect(DeserializationFormat.croaring).eq("croaring"); + expect(DeserializationFormat.portable).eq("portable"); + expect(DeserializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); + expect(DeserializationFormat.unsafe_frozen_portable).eq("unsafe_frozen_portable"); + expect(DeserializationFormat.comma_separated_values).eq("comma_separated_values"); + expect(DeserializationFormat.tab_separated_values).eq("tab_separated_values"); + expect(DeserializationFormat.newline_separated_values).eq("newline_separated_values"); + expect(DeserializationFormat.json_array).eq("json_array"); + + expect(Object.values(DeserializationFormat)).to.deep.eq([ + "croaring", + "portable", + "unsafe_frozen_croaring", + "unsafe_frozen_portable", + "uint32_array", + "comma_separated_values", + "tab_separated_values", + "newline_separated_values", + "json_array", + ]); + + expect(RoaringBitmap32.DeserializationFormat).to.eq(DeserializationFormat); + + expect(new RoaringBitmap32().DeserializationFormat).to.eq(DeserializationFormat); + }); + }); + describe("getSerializationSizeInBytes", () => { it("throws if the argument is not a valid format", () => { const bitmap = new RoaringBitmap32(data); @@ -323,4 +373,31 @@ describe("RoaringBitmap32 serialization", () => { expect(RoaringBitmap32.deserialize(Buffer.from(buffer.buffer, 10), true).toArray()).to.deep.eq(data); }); }); + + it("serialize and deserialize empty bitmaps in various formats", async () => { + for (const format of ["portable", "croaring", "unsafe_frozen_croaring", "uint32_array"] as const) { + const serialized = await new RoaringBitmap32().serializeAsync(format); + expect((await RoaringBitmap32.deserializeAsync(serialized, format)).toArray()).to.deep.equal([]); + } + }); + + it("serialize and deserialize in various formats", async () => { + for (const format of ["portable", "croaring", "unsafe_frozen_croaring"] as const) { + const smallArray = [1, 2, 3, 100, 0xfffff, 0xffffffff]; + const serialized = await new RoaringBitmap32(smallArray).serializeAsync(format); + expect((await RoaringBitmap32.deserializeAsync(serialized, format)).toArray()).to.deep.equal(smallArray); + } + }); + + it("deserializes text", () => { + for (const fmt of [ + "comma_separated_values", + "tab_separated_values", + "newline_separated_values", + "json_array", + ] as const) { + const bitmap = RoaringBitmap32.deserialize(Buffer.from("1, 2,\n3\t4\n5, 6 ,8 9 10 - 100 -101 102"), fmt); + expect(bitmap.toArray()).to.deep.eq([1, 2, 3, 4, 5, 6, 8, 9, 10, 100, 102]); + } + }); }); diff --git a/test/roaring.test.ts b/test/roaring.test.ts index 8590d14..64d57d6 100644 --- a/test/roaring.test.ts +++ b/test/roaring.test.ts @@ -1,4 +1,4 @@ -import roaring, { DeserializationFormat, FrozenViewFormat, SerializationFormat } from ".."; +import roaring from ".."; import { expect } from "chai"; import RoaringBitmap32 from "../RoaringBitmap32"; @@ -43,51 +43,4 @@ describe("roaring", () => { expect(Number.isInteger(Number.parseInt(values[i], 10))).eq(true); } }); - - describe("SerializationFormat", () => { - it("should have the right values", () => { - expect(SerializationFormat.croaring).eq("croaring"); - expect(SerializationFormat.portable).eq("portable"); - expect(SerializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); - - expect(Object.values(SerializationFormat)).to.deep.eq(["croaring", "portable", "unsafe_frozen_croaring"]); - - expect(RoaringBitmap32.SerializationFormat).to.eq(SerializationFormat); - - expect(new RoaringBitmap32().SerializationFormat).to.eq(SerializationFormat); - }); - }); - - describe("DeserializationFormat", () => { - it("should have the right values", () => { - expect(DeserializationFormat.croaring).eq("croaring"); - expect(DeserializationFormat.portable).eq("portable"); - expect(DeserializationFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); - expect(DeserializationFormat.unsafe_frozen_portable).eq("unsafe_frozen_portable"); - - expect(Object.values(DeserializationFormat)).to.deep.eq([ - "croaring", - "portable", - "unsafe_frozen_croaring", - "unsafe_frozen_portable", - ]); - - expect(RoaringBitmap32.DeserializationFormat).to.eq(DeserializationFormat); - - expect(new RoaringBitmap32().DeserializationFormat).to.eq(DeserializationFormat); - }); - }); - - describe("FrozenViewFormat", () => { - it("should have the right values", () => { - expect(FrozenViewFormat.unsafe_frozen_croaring).eq("unsafe_frozen_croaring"); - expect(FrozenViewFormat.unsafe_frozen_portable).eq("unsafe_frozen_portable"); - - expect(Object.values(FrozenViewFormat)).to.deep.eq(["unsafe_frozen_croaring", "unsafe_frozen_portable"]); - - expect(RoaringBitmap32.FrozenViewFormat).to.eq(FrozenViewFormat); - - expect(new RoaringBitmap32().FrozenViewFormat).to.eq(FrozenViewFormat); - }); - }); }); diff --git a/tsconfig.json b/tsconfig.json index 6e2f7eb..b7e39aa 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,13 +4,13 @@ "checkJs": false, "noEmit": true, "allowSyntheticDefaultImports": true, + "esModuleInterop": true, "allowUnreachableCode": false, "allowUnusedLabels": false, "alwaysStrict": true, "baseUrl": ".", "declaration": true, "emitDecoratorMetadata": false, - "esModuleInterop": false, "experimentalDecorators": false, "forceConsistentCasingInFileNames": true, "importHelpers": false,