From 3bdd20bc632dd0186c538d395ed6cce26578f565 Mon Sep 17 00:00:00 2001 From: Przemyslaw Motacki Date: Thu, 14 Sep 2023 16:38:43 +0200 Subject: [PATCH] SNOW-811103: Add sample with test of json parsers --- .npmignore | 1 + samples/README.md | 54 +++++++++ samples/helpers.js | 56 ++++++++++ samples/jsonParserComparison.js | 188 ++++++++++++++++++++++++++++++++ samples/package.json | 14 +++ 5 files changed, 313 insertions(+) create mode 100644 samples/README.md create mode 100644 samples/helpers.js create mode 100644 samples/jsonParserComparison.js create mode 100644 samples/package.json diff --git a/.npmignore b/.npmignore index 3580c3f74..b96ffc12e 100644 --- a/.npmignore +++ b/.npmignore @@ -13,6 +13,7 @@ snowflake-sdk*.tgz coverage system_test/ scripts/ +samples/ ci/ .github/ .eslintrc.js diff --git a/samples/README.md b/samples/README.md new file mode 100644 index 000000000..eb5b4fa84 --- /dev/null +++ b/samples/README.md @@ -0,0 +1,54 @@ +******************************************************************************** +NodeJS Driver - Samples +******************************************************************************** + +Install +====================================================================== + +In directory samples run `npm i`. + +Test +====================================================================== + +Prepare for tests +---------------------------------------------------------------------- + +Specify env variables: + +``` +export SNOWFLAKE_TEST_USER= +export SNOWFLAKE_TEST_PASSWORD= +export SNOWFLAKE_TEST_ACCOUNT= +export SNOWFLAKE_TEST_WAREHOUSE= +export SNOWFLAKE_TEST_DATABASE= +export SNOWFLAKE_TEST_SCHEMA= +export SNOWFLAKE_TEST_PROTOCOL= +export SNOWFLAKE_TEST_HOST= +export SNOWFLAKE_TEST_PORT= +``` + +Run test to compare json parser +---------------------------------------------------------------------- + +By default, the test creates a table with 300000 rows of sample variant data (json format) +and measures the time and number of blocks while retrieving the results using two different +methods to extract data. +1. Streaming results: `stream.on('readable', ...)` +2. Events results: `stream.on('data', ...)` +``` +npm run jsonParserComparison +``` +Test can be started with parameters: + - number of rows in table, default=300000 + - number of selected rows, default=300000 + - only for choosen parser if got as last parameter: Function, vm, better-eval, JSON, default all + +Example: +``` +npm run jsonParserComparison 300000 300000 Function +``` + + or + ``` +npm run jsonParserComparison 300000 300000 JSON +``` \ No newline at end of file diff --git a/samples/helpers.js b/samples/helpers.js new file mode 100644 index 000000000..1d8c0e0b8 --- /dev/null +++ b/samples/helpers.js @@ -0,0 +1,56 @@ +const snowflake = require('snowflake-sdk'); +exports.executeQuery = async function (connection, query, binds) { + await new Promise((resolve, reject) => { + connection.execute({ + sqlText: query, + binds: binds, + complete: function (err, stmt, rows) { + if (!err) { + resolve(rows); + } else { + reject(err); + } + } + }); + }); +}; + +exports.connectUsingEnv = async () => { + const snowflakeTestProtocol = process.env.SNOWFLAKE_TEST_PROTOCOL; + const snowflakeTestHost = process.env.SNOWFLAKE_TEST_HOST; + const snowflakeTestPort = process.env.SNOWFLAKE_TEST_PORT; + const snowflakeTestAccount = process.env.SNOWFLAKE_TEST_ACCOUNT; + const snowflakeTestUser = process.env.SNOWFLAKE_TEST_USER; + const snowflakeTestDatabase = process.env.SNOWFLAKE_TEST_DATABASE; + const snowflakeTestWarehouse = process.env.SNOWFLAKE_TEST_WAREHOUSE; + const snowflakeTestSchema = process.env.SNOWFLAKE_TEST_SCHEMA; + const snowflakeTestPassword = process.env.SNOWFLAKE_TEST_PASSWORD; + const snowflakeTestRole = process.env.SNOWFLAKE_TEST_ROLE; + + const connection = snowflake.createConnection({ + account: snowflakeTestAccount, + username: snowflakeTestUser, + password: snowflakeTestPassword, + role: snowflakeTestRole, + database: snowflakeTestDatabase, + schema: snowflakeTestSchema, + warehouse: snowflakeTestWarehouse, + host: snowflakeTestHost, + port: snowflakeTestPort, + protocol: snowflakeTestProtocol + }); + + return new Promise((resolve, reject) => { + connection.connect( + function (err, conn) { + if (err) { + console.error('Unable to connect: ' + err.message); + reject(new Error(err.message)); + } else { + console.log('Successfully connected to Snowflake'); + resolve(conn); + } + } + ); + }); +}; \ No newline at end of file diff --git a/samples/jsonParserComparison.js b/samples/jsonParserComparison.js new file mode 100644 index 000000000..de0025758 --- /dev/null +++ b/samples/jsonParserComparison.js @@ -0,0 +1,188 @@ +const snowflake = require('snowflake-sdk'); +const helpers = require('./helpers'); +const blocked = require('blocked-at'); + +async function run() { + const defaultRowCount = 300000; + const rowCountToInsert = process.argv[2]; + const rowCountToFetch = process.argv[3]; + const choosenParser = process.argv[4]; + console.log('Started with arguments: '); + console.log(`Inserted rows amount: ${rowCountToInsert} - default ${defaultRowCount}`); + console.log(`Selected rows amount: ${rowCountToFetch} - default ${defaultRowCount}`); + console.log(`Selected json parse : ${choosenParser} - default all of Function, vm, better-eval, JSON`); + + const rowCount = rowCountToInsert || defaultRowCount; + const selectLimit = rowCountToFetch || defaultRowCount; + const testVariantTempName = 'testJsonTempTable000'; + + const createTempTableWithJsonData = `CREATE OR REPLACE TABLE ${testVariantTempName} (value string) + AS select parse_json('{ + "_id": "6501c357397b66ce47719212", + "index": 0, + "guid": "e7e0e5d8-82b4-47f7-a2ab-68588c93d81e", + "isActive": false, + "balance": "$2,611.69", + "picture": "http://placehold.it/32x32", + "age": 21, + "eyeColor": "blue", + "name": "Joanna Atkinson", + "gender": "female", + "company": "AQUAZURE", + "email": "joannaatkinson@aquazure.com", + "phone": "+1 (925) 582-3869", + "address": "395 Karweg Place, Garnet, Mississippi, 9481", + "registered": "2017-05-18T11:16:33 -02:00", + "latitude": 21.372656, + "longitude": -24.488326, + "tags": [ + "aliquip", + "aliqua", + "magna", + "pariatur", + "cillum", + "esse", + "nisi" + ], + "friends": [ + { + "id": 0, + "name": "Davis Blake" + }, + { + "id": 1, + "name": "Raymond Jefferson" + }, + { + "id": 2, + "name": "Hoffman Roberts" + } + ], + "greeting": "Hello, Joanna Atkinson! You have 3 unread messages.", + "favoriteFruit": "apple" + }') + from table(generator(rowcount=>${rowCount}))`; + const createTableWithVariant = (tableName) => `create or replace table ${tableName}(colA variant)`; + + const dropTableWithVariant = (tableName) =>`drop table if exists ${tableName}`; + const dropTempTable = `drop table if exists ${testVariantTempName}`; + + const insertVariant = (tableName)=> `insert into ${tableName} + select parse_json(value) + from ${testVariantTempName}`; + const selectCountVariant = (tableName) => `select count(colA) from ${(tableName)}`; + + let avgBlock = 0, minBlock = 999999999999999, maxBlock = 0; + let blockCount = 0; + + const testCases = []; + if (!choosenParser || choosenParser.toString().includes('Function')) { + testCases.push({parser: 'Function', jsonColumnVariantParser: (rawColumnValue) => new Function(`return (${rawColumnValue})`)}); + } + if (!choosenParser || choosenParser.toString().includes('better-eval')) { + testCases.push({parser: 'betterEval', jsonColumnVariantParser: (rawColumnValue) => require('better-eval').call('(' + rawColumnValue + ')')}); + } + if (!choosenParser || choosenParser.toString().includes('vm')) { + testCases.push({parser: 'vm', jsonColumnVariantParser: rawColumnValue => require('vm').runInNewContext('(' + rawColumnValue + ')')}); + } + // eval lib contains vulnerability so we decide to resign using it + // if (!process.argv[4] || process.argv[4].toString().contains('eval')) { + // testCases.push({parser: 'eval', jsonColumnVariantParser: rawColumnValue => eval('(' + rawColumnValue + ')')}) + // }; + if (!choosenParser || choosenParser.toString().includes('JSON')) { + testCases.push({parser: 'JSON', jsonColumnVariantParser: rawColumnValue => JSON.parse(rawColumnValue)}); + } + + const execute = async ({parser, jsonColumnVariantParser}, extractFunction) => { + console.log(`\nTest for parser: [${parser}] extracting by ${extractFunction.name}`); + const testVariantTableName = `testVariantTable000${parser}`; + let connection = await helpers.connectUsingEnv(); + return new Promise(async (resolve, reject) => { + snowflake.configure({ + jsonColumnVariantParser: jsonColumnVariantParser + }); + + await helpers.executeQuery(connection, createTempTableWithJsonData); + await helpers.executeQuery(connection, createTableWithVariant(testVariantTableName)); + await helpers.executeQuery(connection, insertVariant(testVariantTableName)); + await helpers.executeQuery(connection, selectCountVariant(testVariantTableName));; + + const queryTimeLabel = parser + 'SelectTime'; + let avgBlock = 0, minBlock = 999999999999999, maxBlock = 0; + let blockCount = 0; + blocked((time) => { + blockCount++; + avgBlock += time; + minBlock = minBlock > time ? time : minBlock; + maxBlock = maxBlock < time ? time : maxBlock; + }); + + console.time(queryTimeLabel); + const streamResult = true; + connection.execute({ + streamResult: streamResult, + sqlText: `select * + from IDENTIFIER(?) LIMIT ${selectLimit}`, + binds: [testVariantTableName], + complete: function (err, stmt) { + const stream = stmt.streamRows(); + extractFunction(stream); + stream.on('end', function () { + console.log('parser: ' + parser); + console.log('streamResult: ' + streamResult); + console.log('row count: ' + selectLimit); + console.timeEnd(queryTimeLabel); + console.log('average block time: ' + avgBlock / blockCount); + console.log('minimum block time: ' + minBlock); + console.log('maximum block time: ' + maxBlock); + console.log('block call count: ' + blockCount); + resolve(); + }); + stream.on('error', function (err) { + console.log(err); + reject(err); + }); + } + }); + }) + .finally(async () => { + await helpers.executeQuery(connection, dropTableWithVariant(testVariantTableName)); + await helpers.executeQuery(connection, dropTempTable); + }); + }; + + function extractOnData(stream) { + let count = 0; + stream.on('data', function () { + count++; + if (count % 10000 === 0) { + console.log(`Parsed rows: ${count}`); + } + }); + } + + function extractOnStream(stream) { + let count = 0; + stream.on('readable', function () { + while ((stream.read()) !== null) { + count++; + if (count % 10000 === 0) { + console.log(`Parsed rows: ${count}`); + } + } + }); + } + + testCases.reduce( (promise, nextParser) => { + return promise + .then(() => { + return execute(nextParser, extractOnData); + }) + .then(() => { + return execute(nextParser, extractOnStream); + }); + }, Promise.resolve()); +} + +run(); + diff --git a/samples/package.json b/samples/package.json new file mode 100644 index 000000000..891a6a75a --- /dev/null +++ b/samples/package.json @@ -0,0 +1,14 @@ +{ + "name": "sample", + "version": "0.0.1", + "description": "Snowflake Node.js driver samples", + "dependencies": { + "better-eval": "^1.3.0", + "blocked-at": "^1.2.0", + "snowflake-sdk": "^1.8.0", + "vm": "^0.1.0" + }, + "scripts": { + "jsonParserComparison": "node jsonParserComparison.js" + } +}