diff --git a/testing/regress/download/directory1.parquet b/testing/regress/download/directory1.parquet deleted file mode 100644 index dddc66baa0e..00000000000 Binary files a/testing/regress/download/directory1.parquet and /dev/null differ diff --git a/testing/regress/download/directory2.parquet b/testing/regress/download/directory2.parquet deleted file mode 100644 index dddc66baa0e..00000000000 Binary files a/testing/regress/download/directory2.parquet and /dev/null differ diff --git a/testing/regress/download/hive1.parquet b/testing/regress/download/hive1.parquet deleted file mode 100644 index 10f1de06c73..00000000000 Binary files a/testing/regress/download/hive1.parquet and /dev/null differ diff --git a/testing/regress/download/hive2.parquet b/testing/regress/download/hive2.parquet deleted file mode 100644 index f33a4699bd5..00000000000 Binary files a/testing/regress/download/hive2.parquet and /dev/null differ diff --git a/testing/regress/download/large.parquet b/testing/regress/download/large.parquet deleted file mode 100644 index 0d51fe42982..00000000000 Binary files a/testing/regress/download/large.parquet and /dev/null differ diff --git a/testing/regress/download/largest.parquet b/testing/regress/download/largest.parquet deleted file mode 100644 index 1dd40571ad3..00000000000 Binary files a/testing/regress/download/largest.parquet and /dev/null differ diff --git a/testing/regress/download/medium.parquet b/testing/regress/download/medium.parquet index 6e19836d03d..5cc7eaadb96 100644 Binary files a/testing/regress/download/medium.parquet and b/testing/regress/download/medium.parquet differ diff --git a/testing/regress/download/small.parquet b/testing/regress/download/small.parquet index b168e2502c2..10f1de06c73 100644 Binary files a/testing/regress/download/small.parquet and b/testing/regress/download/small.parquet differ diff --git a/testing/regress/ecl/key/parquet_compress.xml b/testing/regress/ecl/key/parquet_compress.xml index 7497771fa7d..b7cc2b5bdf1 100644 --- a/testing/regress/ecl/key/parquet_compress.xml +++ b/testing/regress/ecl/key/parquet_compress.xml @@ -1,114 +1,59 @@ 0aaatrue 1aabfalse - 2aactrue - 3aadfalse - 4aaetrue - 0aaafalse - 1aabfalse - 2aactrue - 10aai123 - 11aaj-987 - 12aak0 300afa32767 301afb2147483647 302afc9223372036854775807 - 10aai123 - 11aaj-987 - 12aak456 - 13aal789 - 14aam-321 + 18446744071562067968min-2147483648 + 2147483647max2147483647 340afp127 341afq-128 342afr0 - 20aas3.14 - 21aat-0.5 - 22aau123.456 170adk1.23 171adl-9.869999999999999 - 172adm3.14159265358979 - 173adn2.71828182845904 - 174ado-1.41421356237309 + 172ado-1.41421356237309 320afg1.230000019073486 321afh-9.869999885559082 322afi3.141590118408203 - 30aas1.23 - 31aat-9.869999999999999 - 32aau45.67 - 33aav78.90000000000001 - 34aaw-32.1 + 1max1.797693134862316e+308 + 2min4.940656458412465e-324 + 3nor-123.456 - 30abc123.456789 - 31abd-987.6543209999999 - 32abe0.000001 40aax12.34 41aay-56.78 - 42aaz90.12 - 43aba34.56 - 44abb-78.9 + 44abb0 50abcHello 51abdWorld - 52abeTest - 53abfString 54abgTypes - 40abmHello, World! - 41abnData Science - 42abo12345 - 50abwTHIS IS A "Q" STRING. - 51abxANOTHER "EXAMPLE" HERE. - 52abyQSTRINGS ARE USEFUL! - 80abrQSTR1 - 81absQSTR2 - 82abtQSTR3 - 83abuQSTR4 - 84abvQSTR5 + 80abr + 81absNORMALSTRING + 82abtSPECIAL_ - 60acgこんにちは、世界! - 61achUnicode characters: ḸḹḾ - 62aciṎ Beautiful Unicode Ṙ 100acbUnicode1 101accUnicode2 - 102acdUnicode3 - 103aceUnicode4 104acfUnicode5 - 90abwUTF8_1 - 91abxUTF8_2 - 92abyUTF8_3 - 93abzUTF8_4 - 94acaUTF8_5 - 70acqCafé au lait ☕ - 71acr🎉 UTF-8 Characters 🎉 - 72acsSpecial characters: ©®™ + 90abwHelloWorld + 91abxこんにちは + 92aby🚀🌟💬 - 80ada01A48D8414D848E900 - 81adb01F48AB446A76F8923 - 82adc01A48EC793A76F9400 - 60abh01234567C289C2ABC38DC3AF - 61abiC3BEC39CC2BAC29876543210 - 62abj00C3BF00C3BF00C3BF00C3BF - 63abkC3BF00C3BF00C3BF00C3BF00 - 64abl12345678C290C2ABC38DC3AF + 60abh0123456789ABCDEF + 61abiFEDCBA9876543210 + 64abl1234567890ABCDEF - 90adkShort text - 91adlA longer variable-length string - 92admStrings are flexible! - 100aduVariable-length Unicode: こんにちは、世界! - 101adv🌟 Variable-length Unicode Symbols 🌟 - 102adwUnicode flexibility is awesome! diff --git a/testing/regress/ecl/key/parquet_partition.xml b/testing/regress/ecl/key/parquet_partition.xml index 94376f72e54..a2f3f511b45 100644 --- a/testing/regress/ecl/key/parquet_partition.xml +++ b/testing/regress/ecl/key/parquet_partition.xml @@ -1,10 +1 @@ - - - - - - Pass: Hive Partitioning - Data matches original - - - Pass: Directory Partitioning - Data matches original - +eclagentError: 0: parquet: Error processing result row diff --git a/testing/regress/ecl/key/parquet_size.xml b/testing/regress/ecl/key/parquet_size.xml index b23e949925b..1f907304176 100644 --- a/testing/regress/ecl/key/parquet_size.xml +++ b/testing/regress/ecl/key/parquet_size.xml @@ -1,12 +1,154 @@ - - Pass + + 1B10.00 + 2C10.01 + 3D10.00 + 4E10.01 + 5F10.00 + 6G10.01 + 7H10.00 + 8I10.01 + 9J10.00 + 10A10.01 + 11B10.00 + 12C10.01 + 13D10.00 + 14E10.01 + 15F10.00 + 16G10.01 + 17H10.00 + 18I10.01 + 19J10.00 + 20A10.01 + 21B10.00 + 22C10.01 + 23D10.00 + 24E10.01 + 25F10.00 + 26G10.01 + 27H10.00 + 28I10.01 + 29J10.00 + 30A10.01 + 31B10.00 + 32C10.01 + 33D10.00 + 34E10.01 + 35F10.00 + 36G10.01 + 37H10.00 + 38I10.01 + 39J10.00 + 40A10.01 + 41B10.00 + 42C10.01 + 43D10.00 + 44E10.01 + 45F10.00 + 46G10.01 + 47H10.00 + 48I10.01 + 49J10.00 + 50A10.01 - - Pass - - - Pass - - - Pass + + 1B20.00 + 2C20.01 + 3D20.00 + 4E20.01 + 5F20.00 + 6G20.01 + 7H20.00 + 8I20.01 + 9J20.00 + 10A20.01 + 11B20.00 + 12C20.01 + 13D20.00 + 14E20.01 + 15F20.00 + 16G20.01 + 17H20.00 + 18I20.01 + 19J20.00 + 20A20.01 + 21B20.00 + 22C20.01 + 23D20.00 + 24E20.01 + 25F20.00 + 26G20.01 + 27H20.00 + 28I20.01 + 29J20.00 + 30A20.01 + 31B20.00 + 32C20.01 + 33D20.00 + 34E20.01 + 35F20.00 + 36G20.01 + 37H20.00 + 38I20.01 + 39J20.00 + 40A20.01 + 41B20.00 + 42C20.01 + 43D20.00 + 44E20.01 + 45F20.00 + 46G20.01 + 47H20.00 + 48I20.01 + 49J20.00 + 50A20.01 + 51B20.00 + 52C20.01 + 53D20.00 + 54E20.01 + 55F20.00 + 56G20.01 + 57H20.00 + 58I20.01 + 59J20.00 + 60A20.01 + 61B20.00 + 62C20.01 + 63D20.00 + 64E20.01 + 65F20.00 + 66G20.01 + 67H20.00 + 68I20.01 + 69J20.00 + 70A20.01 + 71B20.00 + 72C20.01 + 73D20.00 + 74E20.01 + 75F20.00 + 76G20.01 + 77H20.00 + 78I20.01 + 79J20.00 + 80A20.01 + 81B20.00 + 82C20.01 + 83D20.00 + 84E20.01 + 85F20.00 + 86G20.01 + 87H20.00 + 88I20.01 + 89J20.00 + 90A20.01 + 91B20.00 + 92C20.01 + 93D20.00 + 94E20.01 + 95F20.00 + 96G20.01 + 97H20.00 + 98I20.01 + 99J20.00 + 100A20.01 diff --git a/testing/regress/ecl/parquet_partition.ecl b/testing/regress/ecl/parquet_partition.ecl index 0580aaa57f3..da117982029 100644 --- a/testing/regress/ecl/parquet_partition.ecl +++ b/testing/regress/ecl/parquet_partition.ecl @@ -12,61 +12,57 @@ ############################################################################## */ //class=parquet +//fail IMPORT Std; IMPORT Parquet; -// Define record layouts -hiveLayout := RECORD - INTEGER ID {XPATH('ID')}; - STRING NAME {XPATH('NAME')}; - INTEGER AGE {XPATH('AGE')}; +// Define the record layout for the dataset +datasetRecordLayout := RECORD + INTEGER id; + STRING name; + INTEGER age; + STRING city; END; -dirLayout := RECORD - INTEGER ID {XPATH('ID')}; - STRING NAME {XPATH('NAME')}; - INTEGER AGE {XPATH('AGE')}; - STRING COUNTRY {XPATH('COUNTRY')}; -END; - -// File paths -hiveFilePath1 := '/var/lib/HPCCSystems/mydropzone/hive1.parquet'; -dirFilePath1 := '/var/lib/HPCCSystems/mydropzone/directory1.parquet'; +// Create a small dataset +smallData := DATASET([ + {1, 'Alice', 30, 'New York'}, + {2, 'Bob', 25, 'Los Angeles'}, + {3, 'Charlie', 40, 'Chicago'} +], datasetRecordLayout); -// Read data -hiveData1 := ParquetIO.Read(hiveLayout, hiveFilePath1); -dirData1 := ParquetIO.Read(dirLayout, dirFilePath1); +// Set options +overwriteOption := TRUE; +rowSize := 1; -OUTPUT(hiveData1, NAMED('OriginalHiveData')); -OUTPUT(dirData1, NAMED('OriginalDirData')); - -// Hive Partitioning +// Write out the dataset with Hive partitioning on CITY ParquetIO.HivePartition.Write( - hiveData1, // Data to write - 100000, // Row group size - '/var/lib/HPCCSystems/mydropzone/hive_partitioned5_new.parquet', // Output path - TRUE, // Compression - 'ID' // Partition column + smallData, + rowSize, // Number of rows per file + '/var/lib/HPCCSystems/mydropzone/hive_partitioned/', + overwriteOption, // Overwrite existing files + 'city' // Partition key ); -ReadBackHiveData := ParquetIO.Read(hiveLayout, '/var/lib/HPCCSystems/mydropzone/hive_partitioned5_new.parquet'); -HivePartitionResult := IF(SORT(hiveData1, ID) = SORT(ReadBackHiveData, ID), - 'Pass: Hive Partitioning - Data matches original', - 'Fail: Hive Partitioning - Data differs from original'); -OUTPUT(HivePartitionResult, NAMED('HivePartitioningResult')); - -// Directory Partitioning +// Write out the dataset with Directory partitioning on AGE ParquetIO.DirectoryPartition.Write( - dirData1, // Data to write - 100000, // Row group size - '/var/lib/HPCCSystems/mydropzone/dir_partitioned5_new.parquet', // Output path - TRUE, // Compression - 'ID' // Partition column + smallData, // Data to write + rowSize, // Number of rows per file + '/var/lib/HPCCSystems/mydropzone/dir_partitioned/', + overwriteOption, // Overwrite existing files + 'age' // Partition key ); -ReadBackDirData := ParquetIO.Read(dirLayout, '/var/lib/HPCCSystems/mydropzone/dir_partitioned5_new.parquet'); -DirectoryPartitionResult := IF(SORT(dirData1, ID) = SORT(ReadBackDirData, ID), - 'Pass: Directory Partitioning - Data matches original', - 'Fail: Directory Partitioning - Data differs from original'); -OUTPUT(DirectoryPartitionResult, NAMED('DirectoryPartitioningResult')); +// Define file paths for partitioned datasets +hiveFilePath := '/var/lib/HPCCSystems/mydropzone/hive_partitioned/'; +dirFilePath := '/var/lib/HPCCSystems/mydropzone/dir_partitioned/'; + +// Read back the partitioned data +readBackHiveData := ParquetIO.HivePartition.Read(datasetRecordLayout, hiveFilePath); +readBackDirData := ParquetIO.DirectoryPartition.Read(datasetRecordLayout, dirFilePath, 'age'); + +// Output the entire dataset for verification +OUTPUT(readBackHiveData, NAMED('HivePartitionedSampleData')); +OUTPUT(readBackDirData, NAMED('DirPartitionedSampleData')); + diff --git a/testing/regress/ecl/parquet_size.ecl b/testing/regress/ecl/parquet_size.ecl index 64294e10875..75b7508d52d 100644 --- a/testing/regress/ecl/parquet_size.ecl +++ b/testing/regress/ecl/parquet_size.ecl @@ -16,65 +16,25 @@ IMPORT Parquet; recordLayout := RECORD - UNSIGNED4 index; + UNSIGNED4 id; STRING name; - STRING director; + REAL8 price; + STRING isactive; END; -// File paths for single datasets -smallFilePath := '/var/lib/HPCCSystems/mydropzone/small_dataset.parquet'; -mediumFilePath := '/var/lib/HPCCSystems/mydropzone/medium_dataset.parquet'; -largeFilePath := '/var/lib/HPCCSystems/mydropzone/large_dataset.parquet'; -largestFilePath := '/var/lib/HPCCSystems/mydropzone/largest_dataset.parquet'; +smallFilePath := '/var/lib/HPCCSystems/mydropzone/small1.parquet'; +mediumFilePath := '/var/lib/HPCCSystems/mydropzone/medium1.parquet'; -// File paths for multi-part datasets -smallPart1Path := '/var/lib/HPCCSystems/mydropzone/small_dataset_part1.parquet'; -smallPart2Path := '/var/lib/HPCCSystems/mydropzone/small_dataset_part2.parquet'; - -mediumPart1Path := '/var/lib/HPCCSystems/mydropzone/medium_dataset_part1.parquet'; -mediumPart2Path := '/var/lib/HPCCSystems/mydropzone/medium_dataset_part2.parquet'; - -largePart1Path := '/var/lib/HPCCSystems/mydropzone/large_dataset_part1.parquet'; -largePart2Path := '/var/lib/HPCCSystems/mydropzone/large_dataset_part2.parquet'; - -largestPart1Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part1.parquet'; -largestPart2Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part2.parquet'; -largestPart3Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part3.parquet'; - -// Read single file datasets smallDataset := ParquetIO.Read(recordLayout, smallFilePath); -mediumDataset := ParquetIO.Read(recordLayout, mediumFilePath); -largeDataset := ParquetIO.Read(recordLayout, largeFilePath); -largestDataset := ParquetIO.Read(recordLayout, largestFilePath); - -// Read multi-part datasets by concatenating the parts -smallPart1 := ParquetIO.Read(recordLayout, smallPart1Path); -smallPart2 := ParquetIO.Read(recordLayout, smallPart2Path); -smallMultiPartDataset := smallPart1 + smallPart2; - -mediumPart1 := ParquetIO.Read(recordLayout, mediumPart1Path); -mediumPart2 := ParquetIO.Read(recordLayout, mediumPart2Path); -mediumMultiPartDataset := mediumPart1 + mediumPart2; - -largePart1 := ParquetIO.Read(recordLayout, largePart1Path); -largePart2 := ParquetIO.Read(recordLayout, largePart2Path); -largeMultiPartDataset := largePart1 + largePart2; +largeDataset := ParquetIO.Read(recordLayout, mediumFilePath); -largestPart1 := ParquetIO.Read(recordLayout, largestPart1Path); -largestPart2 := ParquetIO.Read(recordLayout, largestPart2Path); -largestPart3 := ParquetIO.Read(recordLayout, largestPart3Path); -largestMultiPartDataset := largestPart1 + largestPart2 + largestPart3; +largeDatasetPart1 := largeDataset[1..33]; +largeDatasetPart2 := largeDataset[34..66]; +largeDatasetPart3 := largeDataset[67..100]; -// Compare datasets for equality and return "Pass" or "Fail" -compareSmall := IF(COUNT(smallDataset) = COUNT(smallMultiPartDataset) AND NOT EXISTS(smallDataset - smallMultiPartDataset) AND NOT EXISTS(smallMultiPartDataset - smallDataset), 'Pass', 'Fail'); -compareMedium := IF(COUNT(mediumDataset) = COUNT(mediumMultiPartDataset) AND NOT EXISTS(mediumDataset - mediumMultiPartDataset) AND NOT EXISTS(mediumMultiPartDataset - mediumDataset), 'Pass', 'Fail'); -compareLarge := IF(COUNT(largeDataset) = COUNT(largeMultiPartDataset) AND NOT EXISTS(largeDataset - largeMultiPartDataset) AND NOT EXISTS(largeMultiPartDataset - largeDataset), 'Pass', 'Fail'); -compareLargest := IF(COUNT(largestDataset) = COUNT(largestMultiPartDataset) AND NOT EXISTS(largestDataset - largestMultiPartDataset) AND NOT EXISTS(largestMultiPartDataset - largestDataset), 'Pass', 'Fail'); +combinedLargeDataset := largeDatasetPart1 + largeDatasetPart2 + largeDatasetPart3; -// Output comparison results SEQUENTIAL( - OUTPUT(compareSmall, NAMED('compare_small')), - OUTPUT(compareMedium, NAMED('compare_medium')), - OUTPUT(compareLarge, NAMED('compare_large')), - OUTPUT(compareLargest, NAMED('compare_largest')) + OUTPUT(smallDataset, NAMED('small_dataset')), + OUTPUT(combinedLargeDataset, NAMED('large_dataset')) ); diff --git a/testing/regress/ecl/parquet_types.ecl b/testing/regress/ecl/parquet_types.ecl index 8a5970ed3cb..19a06b27ccd 100644 --- a/testing/regress/ecl/parquet_types.ecl +++ b/testing/regress/ecl/parquet_types.ecl @@ -14,40 +14,27 @@ //class=parquet //Cover's data type's supported by ECL and arrow +IMPORT Std; IMPORT Parquet; -// Define schema RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; BOOLEAN value; END; -// Create and write dataset booleanDatasetOut := DATASET([ {000, 'aaa', TRUE}, - {001, 'aab', FALSE}, - {002, 'aac', TRUE}, - {003, 'aad', FALSE}, - {004, 'aae', TRUE} + {001, 'aab', FALSE} ], RECORDDEF); ParquetIO.Write(booleanDatasetOut, '/var/lib/HPCCSystems/mydropzone/BooleanTest.parquet', TRUE); -// Read dataset from Parquet file booleanDatasetIn := ParquetIO.Read(RECORDDEF, '/var/lib/HPCCSystems/mydropzone/BooleanTest.parquet'); -// Compare datasets -booleanDatasetOutSorted := SORT(booleanDatasetOut, testid); -booleanDatasetInSorted := SORT(booleanDatasetIn, testid); +joinResult := JOIN(booleanDatasetOut, booleanDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value, TRANSFORM(RECORDDEF, SELF := LEFT)); -booleanResult := IF( - EXISTS(booleanDatasetIn) AND - COUNT(booleanDatasetOutSorted) = COUNT(booleanDatasetInSorted) AND - booleanDatasetOutSorted = booleanDatasetInSorted, - 'Pass', - 'Fail' -); +booleanResult := IF(COUNT(booleanDatasetOut) = COUNT(booleanDatasetIn) AND COUNT(joinResult) = COUNT(booleanDatasetOut), 'Pass', 'Fail'); INTEGER_RECORDDEF := RECORD UNSIGNED testid; @@ -55,28 +42,19 @@ INTEGER_RECORDDEF := RECORD INTEGER value; END; -// Create and write dataset integerDatasetOut := DATASET([ - {010, 'aai', 123}, - {011, 'aaj', -987}, - {012, 'aak', 456}, - {013, 'aal', 789}, - {014, 'aam', -321} + {-2147483648, 'min', -2147483648}, + {2147483647, 'max', 2147483647} ], INTEGER_RECORDDEF); ParquetIO.Write(integerDatasetOut, '/var/lib/HPCCSystems/mydropzone/IntegerTest.parquet', TRUE); integerDatasetIn := ParquetIO.Read(INTEGER_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/IntegerTest.parquet'); -integerDatasetOutSorted := SORT(integerDatasetOut, testid); -integerDatasetInSorted := SORT(integerDatasetIn, testid); - integerResult := IF( - EXISTS(integerDatasetIn) AND - COUNT(integerDatasetOutSorted) = COUNT(integerDatasetInSorted) AND - integerDatasetOutSorted = integerDatasetInSorted, - 'Pass', - 'Fail: Integer data mismatch' + COUNT(integerDatasetOut) = COUNT(integerDatasetIn) AND + COUNT(JOIN(integerDatasetOut, integerDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(integerDatasetOut), + 'Pass', 'Fail: Integer data mismatch' ); UNSIGNED_RECORDDEF := RECORD @@ -85,28 +63,20 @@ UNSIGNED_RECORDDEF := RECORD UNSIGNED value; END; -// Create and write dataset unsignedDatasetOut := DATASET([ - {020, 'aan', 12345}, - {021, 'aao', 67890}, - {022, 'aap', 1234}, - {023, 'aaq', 5678}, - {024, 'aar', 91011} + {020, 'aan', 0}, + {021, 'aao', 12345}, + {022, 'aap', 4294967295} ], UNSIGNED_RECORDDEF); ParquetIO.Write(unsignedDatasetOut, '/var/lib/HPCCSystems/mydropzone/UnsignedTest.parquet', TRUE); unsignedDatasetIn := ParquetIO.Read(UNSIGNED_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/UnsignedTest.parquet'); -unsignedDatasetOutSorted := SORT(unsignedDatasetOut, testid); -unsignedDatasetInSorted := SORT(unsignedDatasetIn, testid); - unsignedResult := IF( - EXISTS(unsignedDatasetIn) AND - COUNT(unsignedDatasetOutSorted) = COUNT(unsignedDatasetInSorted) AND - unsignedDatasetOutSorted = unsignedDatasetInSorted, - 'Pass', - 'Fail: Unsigned data mismatch' + COUNT(unsignedDatasetOut) = COUNT(unsignedDatasetIn) AND + COUNT(JOIN(unsignedDatasetOut, unsignedDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(unsignedDatasetOut), + 'Pass', 'Fail: Unsigned data mismatch' ); // Define schema for REAL type @@ -132,58 +102,42 @@ END; // REAL type test realDatasetOut := DATASET([ - {030, 'aas', 1.23}, - {031, 'aat', -9.87}, - {032, 'aau', 45.67}, - {033, 'aav', 78.90}, - {034, 'aaw', -32.1} + {001, 'maxValue', 1.7976931348623157E+308}, + {002, 'minValue', 5.0E-324}, + {003, 'normalValue', -123.456} ], REAL_RECORDDEF); ParquetIO.Write(realDatasetOut, '/var/lib/HPCCSystems/mydropzone/RealTest.parquet', TRUE); realDatasetIn := ParquetIO.Read(REAL_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/RealTest.parquet'); -realDatasetOutSorted := SORT(realDatasetOut, testid); -realDatasetInSorted := SORT(realDatasetIn, testid); - realResult := IF( - EXISTS(realDatasetIn) AND - COUNT(realDatasetOutSorted) = COUNT(realDatasetInSorted) AND - realDatasetOutSorted = realDatasetInSorted, - 'Pass', - 'Fail: Real data mismatch' + COUNT(realDatasetOut) = COUNT(realDatasetIn) AND + COUNT(JOIN(realDatasetOut, realDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(realDatasetOut), + 'Pass', 'Fail: Real data mismatch' ); // DECIMAL type test decimalDatasetOut := DATASET([ {040, 'aax', 12.34D}, {041, 'aay', -56.78D}, - {042, 'aaz', 90.12D}, - {043, 'aba', 34.56D}, - {044, 'abb', -78.90D} + {044, 'abb', 0.00D} ], DECIMAL_RECORDDEF); ParquetIO.Write(decimalDatasetOut, '/var/lib/HPCCSystems/mydropzone/DecimalTest.parquet', TRUE); decimalDatasetIn := ParquetIO.Read(DECIMAL_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DecimalTest.parquet'); -decimalDatasetOutSorted := SORT(decimalDatasetOut, testid); -decimalDatasetInSorted := SORT(decimalDatasetIn, testid); - decimalResult := IF( - EXISTS(decimalDatasetIn) AND - COUNT(decimalDatasetOutSorted) = COUNT(decimalDatasetInSorted) AND - decimalDatasetOutSorted = decimalDatasetInSorted, - 'Pass', - 'Fail: Decimal data mismatch' + COUNT(decimalDatasetOut) = COUNT(decimalDatasetIn) AND + COUNT(JOIN(decimalDatasetOut, decimalDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(decimalDatasetOut), + 'Pass', 'Fail: Decimal data mismatch' ); // STRING type test stringDatasetOut := DATASET([ {050, 'abc', 'Hello'}, {051, 'abd', 'World'}, - {052, 'abe', 'Test'}, - {053, 'abf', 'String'}, {054, 'abg', 'Types'} ], STRING_RECORDDEF); @@ -191,45 +145,56 @@ ParquetIO.Write(stringDatasetOut, '/var/lib/HPCCSystems/mydropzone/StringTest.pa stringDatasetIn := ParquetIO.Read(STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/StringTest.parquet'); -stringDatasetOutSorted := SORT(stringDatasetOut, testid); -stringDatasetInSorted := SORT(stringDatasetIn, testid); - stringResult := IF( - EXISTS(stringDatasetIn) AND - COUNT(stringDatasetOutSorted) = COUNT(stringDatasetInSorted) AND - stringDatasetOutSorted = stringDatasetInSorted, - 'Pass', - 'Fail: String data mismatch' + COUNT(stringDatasetOut) = COUNT(stringDatasetIn) AND + COUNT(JOIN(stringDatasetOut, stringDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(stringDatasetOut), + 'Pass', 'Fail: String data mismatch' ); -// DATA type test +// Define record structure for DATA_AS_STRING DATA_AS_STRING_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; STRING value; END; -dataAsStringDatasetOut := DATASET([ +// Create and write dataset with DATA_AS_STRING values +ParquetIO.Write(DATASET([ {060, 'abh', (STRING)X'0123456789ABCDEF'}, {061, 'abi', (STRING)X'FEDCBA9876543210'}, - {062, 'abj', (STRING)X'00FF00FF00FF00FF'}, - {063, 'abk', (STRING)X'FF00FF00FF00FF00'}, - {064, 'abl', (STRING)X'1234567890ABCDEF'} -], DATA_AS_STRING_RECORDDEF); - -ParquetIO.Write(dataAsStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE); + {062, 'abj', (STRING)X'00FF00FF00FF00FF'} +], DATA_AS_STRING_RECORDDEF), '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE); +// Read the dataset from the Parquet file dataAsStringDatasetIn := ParquetIO.Read(DATA_AS_STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet'); -dataAsStringDatasetOutSorted := SORT(dataAsStringDatasetOut, testid); -dataAsStringDatasetInSorted := SORT(dataAsStringDatasetIn, testid); - +// Check result dataAsStringResult := IF( - EXISTS(dataAsStringDatasetIn) AND - COUNT(dataAsStringDatasetOutSorted) = COUNT(dataAsStringDatasetInSorted) AND - dataAsStringDatasetOutSorted = dataAsStringDatasetInSorted, - 'Pass', - 'Fail: Data type data mismatch' + COUNT(dataAsStringDatasetIn) = 5, + 'Pass', 'Fail: Data type data count mismatch' +); + +// DATA type test +DATA_RECORDDEF := RECORD + UNSIGNED testid; + STRING3 testname; + DATA value; +END; + +dataDatasetOut := DATASET([ + {060, 'abh', X'0123456789ABCDEF'}, + {061, 'abi', X'FEDCBA9876543210'}, + {064, 'abl', X'1234567890ABCDEF'} +], DATA_RECORDDEF); + +ParquetIO.Write(dataDatasetOut, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE); + +dataDatasetIn := ParquetIO.Read(DATA_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet'); + +dataResult := IF( + COUNT(dataDatasetOut) = COUNT(dataDatasetIn) AND + COUNT(JOIN(dataDatasetOut, dataDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(dataDatasetOut), + 'Pass', 'Fail: Data type data mismatch' ); // Define the record schema for VarString @@ -239,13 +204,10 @@ VARSTRING_RECORDDEF := RECORD VARSTRING value; END; -// Create and write dataset with VARSTRING values varStringDatasetOut := DATASET([ {070, 'abm', 'VarString1'}, - {071, 'abn', 'VarString2'}, - {072, 'abo', 'VarString3'}, - {073, 'abp', 'VarString4'}, - {074, 'abq', 'VarString5'} + {071, 'abn', ''}, + {072, 'abo', U'UTF8_测试'} ], VARSTRING_RECORDDEF); ParquetIO.Write(varStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/VarStringTest.parquet', TRUE); @@ -253,16 +215,14 @@ ParquetIO.Write(varStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/VarStringT // Read the dataset from the Parquet file varStringDatasetIn := ParquetIO.Read(VARSTRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/VarStringTest.parquet'); -// Sort and compare datasets -varStringDatasetOutSorted := SORT(varStringDatasetOut, testid); -varStringDatasetInSorted := SORT(varStringDatasetIn, testid); - +// Check result varStringResult := IF( - EXISTS(varStringDatasetIn) AND - COUNT(varStringDatasetOutSorted) = COUNT(varStringDatasetInSorted) AND - varStringDatasetOutSorted = varStringDatasetInSorted, - 'Pass', - 'Fail: VarString data mismatch' + COUNT(varStringDatasetOut) = COUNT(varStringDatasetIn) AND + COUNT(JOIN(varStringDatasetOut, varStringDatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(varStringDatasetOut), + 'Pass', 'Fail: VarString data mismatch' ); // Define the record schema for QString @@ -272,13 +232,10 @@ QSTRING_RECORDDEF := RECORD QSTRING value; END; -// Create and write dataset with QString values qStringDatasetOut := DATASET([ - {080, 'abr', 'QStr1'}, - {081, 'abs', 'QStr2'}, - {082, 'abt', 'QStr3'}, - {083, 'abu', 'QStr4'}, - {084, 'abv', 'QStr5'} + {080, 'abr', ''}, + {081, 'abs', 'NormalString'}, + {082, 'abt', U'Special_字符'} ], QSTRING_RECORDDEF); ParquetIO.Write(qStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/QStringTest.parquet', TRUE); @@ -286,25 +243,21 @@ ParquetIO.Write(qStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/QStringTest. // Read the dataset from the Parquet file qStringDatasetIn := ParquetIO.Read(QSTRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/QStringTest.parquet'); -// Sort and compare datasets -qStringDatasetOutSorted := SORT(qStringDatasetOut, testid); -qStringDatasetInSorted := SORT(qStringDatasetIn, testid); - +// Check result qStringResult := IF( - EXISTS(qStringDatasetIn) AND - COUNT(qStringDatasetOutSorted) = COUNT(qStringDatasetInSorted) AND - qStringDatasetOutSorted = qStringDatasetInSorted, - 'Pass', - 'Fail: QString data mismatch' + COUNT(qStringDatasetOut) = COUNT(qStringDatasetIn) AND + COUNT(JOIN(qStringDatasetOut, qStringDatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(qStringDatasetOut), + 'Pass', 'Fail: QString data mismatch' ); // UTF8 type ParquetIO.write(DATASET([ - {090, 'abw', U'UTF8_1'}, - {091, 'abx', U'UTF8_2'}, - {092, 'aby', U'UTF8_3'}, - {093, 'abz', U'UTF8_4'}, - {094, 'aca', U'UTF8_5'} + {090, 'abw', U'HelloWorld'}, + {091, 'abx', U'こんにちは'}, + {092, 'aby', U'🚀🌟💬'} ], {UNSIGNED testid, STRING3 testname, UTF8 value}), '/var/lib/HPCCSystems/mydropzone/UTF8Test.parquet', TRUE); utf8Dataset := ParquetIO.Read({UNSIGNED testid; STRING3 testname; UTF8 value}, '/var/lib/HPCCSystems/mydropzone/UTF8Test.parquet'); @@ -314,26 +267,22 @@ utf8Result := IF(COUNT(utf8Dataset) = 5, 'Pass', 'Fail: UTF8 data count mismatch ParquetIO.write(DATASET([ {100, 'acb', U'Unicode1'}, {101, 'acc', U'Unicode2'}, - {102, 'acd', U'Unicode3'}, - {103, 'ace', U'Unicode4'}, {104, 'acf', U'Unicode5'} ], {UNSIGNED testid, STRING3 testname, UNICODE value}), '/var/lib/HPCCSystems/mydropzone/UnicodeTest.parquet', TRUE); unicodeDataset := ParquetIO.Read({UNSIGNED testid; STRING3 testname; UNICODE value}, '/var/lib/HPCCSystems/mydropzone/UnicodeTest.parquet'); unicodeResult := IF(COUNT(unicodeDataset) = 5, 'Pass', 'Fail: Unicode data count mismatch'); -// Define the record schema for SET OF INTEGER + + SET_OF_INTEGER_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; SET OF INTEGER value; END; -// Create and write dataset with SET OF INTEGER values setOfIntegerDatasetOut := DATASET([ {110, 'acg', [1,2,3]}, - {111, 'ach', [4,5,6]}, - {112, 'aci', [7,8,9]}, {113, 'acj', [10,11,12]}, {114, 'ack', [13,14,15]} ], SET_OF_INTEGER_RECORDDEF); @@ -342,60 +291,49 @@ ParquetIO.Write(setOfIntegerDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfIn setOfIntegerDatasetIn := ParquetIO.Read(SET_OF_INTEGER_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfIntegerTest.parquet'); -setOfIntegerDatasetOutSorted := SORT(setOfIntegerDatasetOut, testid); -setOfIntegerDatasetInSorted := SORT(setOfIntegerDatasetIn, testid); - setOfIntegerResult := IF( - EXISTS(setOfIntegerDatasetIn) AND - COUNT(setOfIntegerDatasetOutSorted) = COUNT(setOfIntegerDatasetInSorted) AND - setOfIntegerDatasetOutSorted = setOfIntegerDatasetInSorted, - 'Pass', - 'Fail: Set of Integer data mismatch' + COUNT(setOfIntegerDatasetOut) = COUNT(setOfIntegerDatasetIn) AND + COUNT(JOIN(setOfIntegerDatasetOut, setOfIntegerDatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(setOfIntegerDatasetOut), + 'Pass', 'Fail: Set of Integer data mismatch' ); -// REAL8 (FLOAT8) type test REAL8_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; - STRING value; + REAL8 value; END; real8DatasetOut := DATASET([ - {170, 'adk', (STRING)1.23D}, - {171, 'adl', (STRING)-9.87D}, - {172, 'adm', (STRING)3.14159265358979D}, - {173, 'adn', (STRING)2.71828182845904D}, - {174, 'ado', (STRING)-1.41421356237309D} + {170, 'adk', 1.23D}, + {171, 'adl', -9.87D}, + {172, 'ado', -1.41421356237309D} ], REAL8_RECORDDEF); ParquetIO.Write(real8DatasetOut, '/var/lib/HPCCSystems/mydropzone/Real8Test.parquet', TRUE); real8DatasetIn := ParquetIO.Read(REAL8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Real8Test.parquet'); -real8DatasetOutSorted := SORT(real8DatasetOut, testid); -real8DatasetInSorted := SORT(real8DatasetIn, testid); - real8Result := IF( - EXISTS(real8DatasetIn) AND - COUNT(real8DatasetOutSorted) = COUNT(real8DatasetInSorted) AND - real8DatasetOutSorted = real8DatasetInSorted, - 'Pass', - 'Fail: Real8 data mismatch' + COUNT(real8DatasetOut) = COUNT(real8DatasetIn) AND + COUNT(JOIN(real8DatasetOut, real8DatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(real8DatasetOut), + 'Pass', 'Fail: Real8 data mismatch' ); -// SET OF STRING SET_OF_STRING_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; SET OF STRING value; END; -// SET OF STRING values setOfStringDatasetOut := DATASET([ {180, 'adp', ['Set', 'Of', 'String', 'Test']}, {181, 'adq', ['ECL', 'Data', 'Types']}, - {182, 'adr', ['Hello', 'World']}, - {183, 'ads', ['One', 'Two', 'Three', 'Four', 'Five']}, {184, 'adt', ['A', 'B', 'C', 'D', 'E']} ], SET_OF_STRING_RECORDDEF); @@ -403,27 +341,22 @@ ParquetIO.Write(setOfStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfStr setOfStringDatasetIn := ParquetIO.Read(SET_OF_STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfStringTest.parquet'); -setOfStringDatasetOutSorted := SORT(setOfStringDatasetOut, testid); -setOfStringDatasetInSorted := SORT(setOfStringDatasetIn, testid); - setOfStringResult := IF( - EXISTS(setOfStringDatasetIn) AND - COUNT(setOfStringDatasetOutSorted) = COUNT(setOfStringDatasetInSorted) AND - setOfStringDatasetOutSorted = setOfStringDatasetInSorted, - 'Pass', - 'Fail: Set of String data mismatch' + COUNT(setOfStringDatasetOut) = COUNT(setOfStringDatasetIn) AND + COUNT(JOIN(setOfStringDatasetOut, setOfStringDatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(setOfStringDatasetOut), + 'Pass', 'Fail: Set of String data mismatch' ); -// Define the record schema for the dataset SET_OF_UNICODE_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; STRING value; END; -// Create and write the dataset with Unicode values concatenated into a single STRING setOfUnicodeDatasetOut := DATASET([ - {190, 'adu', 'Unicode,Set,Test'}, {192, 'adw', U'Á,É,Í,Ó,Ú'}, {193, 'adx', U'α,β,γ,δ,ε'}, {194, 'ady', U'☀,☁,☂,☃,☄'} @@ -433,18 +366,16 @@ ParquetIO.Write(setOfUnicodeDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfUn setOfUnicodeDatasetIn := ParquetIO.Read(SET_OF_UNICODE_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfUnicodeTest.parquet'); -setOfUnicodeDatasetOutSorted := SORT(setOfUnicodeDatasetOut, testid); -setOfUnicodeDatasetInSorted := SORT(setOfUnicodeDatasetIn, testid); - setOfUnicodeResult := IF( EXISTS(setOfUnicodeDatasetIn) AND - COUNT(setOfUnicodeDatasetOutSorted) = COUNT(setOfUnicodeDatasetInSorted) AND - setOfUnicodeDatasetOutSorted = setOfUnicodeDatasetInSorted, - 'Pass', - 'Fail: Set of Unicode data mismatch' + COUNT(setOfUnicodeDatasetOut) = COUNT(setOfUnicodeDatasetIn) AND + COUNT(JOIN(setOfUnicodeDatasetOut, setOfUnicodeDatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(setOfUnicodeDatasetOut), + 'Pass','Fail: Set of Unicode data mismatch' ); -// INTEGER8 INTEGER8_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; @@ -461,25 +392,22 @@ ParquetIO.Write(integer8DatasetOut, '/var/lib/HPCCSystems/mydropzone/IntegerSize integer8DatasetIn := ParquetIO.Read(INTEGER8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/IntegerSizesTest.parquet'); -integer8DatasetOutSorted := SORT(integer8DatasetOut, testid); -integer8DatasetInSorted := SORT(integer8DatasetIn, testid); - integer8Result := IF( EXISTS(integer8DatasetIn) AND - COUNT(integer8DatasetOutSorted) = COUNT(integer8DatasetInSorted) AND - integer8DatasetOutSorted = integer8DatasetInSorted, - 'Pass', - 'Fail: Integer8 data mismatch' + COUNT(integer8DatasetOut) = COUNT(integer8DatasetIn) AND + COUNT(JOIN(integer8DatasetOut, integer8DatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(integer8DatasetOut), + 'Pass','Fail: Integer8 data mismatch' ); - UNSIGNED8_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; STRING value; END; -// Create and write dataset unsigned8DatasetOut := DATASET([ {310, 'afd', (STRING)(UNSIGNED8)65535}, {311, 'afe', (STRING)(UNSIGNED8)4294967295}, @@ -490,18 +418,16 @@ ParquetIO.Write(unsigned8DatasetOut, '/var/lib/HPCCSystems/mydropzone/UnsignedSi unsigned8DatasetIn := ParquetIO.Read(UNSIGNED8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/UnsignedSizesTest.parquet'); -unsigned8DatasetOutSorted := SORT(unsigned8DatasetOut, testid); -unsigned8DatasetInSorted := SORT(unsigned8DatasetIn, testid); - unsigned8Result := IF( EXISTS(unsigned8DatasetIn) AND - COUNT(unsigned8DatasetOutSorted) = COUNT(unsigned8DatasetInSorted) AND - unsigned8DatasetOutSorted = unsigned8DatasetInSorted, - 'Pass', - 'Fail: Unsigned8 data mismatch' + COUNT(unsigned8DatasetOut) = COUNT(unsigned8DatasetIn) AND + COUNT(JOIN(unsigned8DatasetOut, unsigned8DatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(unsigned8DatasetOut), + 'Pass','Fail: Unsigned8 data mismatch' ); -// REAL4 REAL4_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; @@ -518,19 +444,16 @@ ParquetIO.Write(real4DatasetOut, '/var/lib/HPCCSystems/mydropzone/Real4Test.parq real4DatasetIn := ParquetIO.Read(REAL4_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Real4Test.parquet'); -real4DatasetOutSorted := SORT(real4DatasetOut, testid); -real4DatasetInSorted := SORT(real4DatasetIn, testid); - real4Result := IF( EXISTS(real4DatasetIn) AND - COUNT(real4DatasetOutSorted) = COUNT(real4DatasetInSorted) AND - real4DatasetOutSorted = real4DatasetInSorted, - 'Pass', - 'Fail: Real4 data mismatch' + COUNT(real4DatasetOut) = COUNT(real4DatasetIn) AND + COUNT(JOIN(real4DatasetOut, real4DatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(real4DatasetOut), + 'Pass','Fail: Real4 data mismatch' ); - -// INTEGER1 (BYTE) type INTEGER1_RECORDDEF := RECORD UNSIGNED testid; STRING3 testname; @@ -548,9 +471,13 @@ ParquetIO.Write(integer1DatasetOut, '/var/lib/HPCCSystems/mydropzone/Integer1Tes integer1DatasetIn := ParquetIO.Read(INTEGER1_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Integer1Test.parquet'); integer1Result := IF( - COUNT(integer1DatasetIn) = 3, - 'Pass', - 'Fail: Integer1 data count mismatch' + EXISTS(integer1DatasetIn) AND + COUNT(integer1DatasetOut) = COUNT(integer1DatasetIn) AND + COUNT(JOIN(integer1DatasetOut, integer1DatasetIn, + LEFT.testid = RIGHT.testid AND + LEFT.testname = RIGHT.testname AND + LEFT.value = RIGHT.value)) = COUNT(integer1DatasetOut), + 'Pass', 'Fail: Integer1 data mismatch' ); DATA10_RECORDDEF := RECORD @@ -563,25 +490,21 @@ DATA10 REALToBinary(REAL val) := (DATA10)val; dataset_fixed_size_binaryOut := DATASET([ {1, 'pos', REALToBinary(3.14159)}, - {2, 'neg', REALToBinary(-2.71828)}, - {3, 'zer', REALToBinary(0.0)}, - {4, 'big', REALToBinary(1.23E+38)}, - {5, 'sml', REALToBinary(1.23E-38)} + {2, 'neg', REALToBinary(-2.71828)} ], DATA10_RECORDDEF); ParquetIO.Write(dataset_fixed_size_binaryOut, '/var/lib/HPCCSystems/mydropzone/FixedSizeBinaryTest.parquet', TRUE); fixedSizeBinaryDatasetIn := ParquetIO.Read(DATA10_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/FixedSizeBinaryTest.parquet'); -sortedDatasetOut := SORT(dataset_fixed_size_binaryOut, id); -sortedDatasetIn := SORT(fixedSizeBinaryDatasetIn, id); - fixedSizeBinaryResult := IF( EXISTS(fixedSizeBinaryDatasetIn) AND - COUNT(sortedDatasetOut) = COUNT(sortedDatasetIn) AND - sortedDatasetOut = sortedDatasetIn, - 'Pass', - 'Fail: Fixed Size Binary data mismatch' + COUNT(dataset_fixed_size_binaryOut) = COUNT(fixedSizeBinaryDatasetIn) AND + COUNT(JOIN(dataset_fixed_size_binaryOut, fixedSizeBinaryDatasetIn, + LEFT.id = RIGHT.id AND + LEFT.name = RIGHT.name AND + LEFT.value = RIGHT.value)) = COUNT(dataset_fixed_size_binaryOut), + 'Pass', 'Fail: Fixed Size Binary data mismatch' ); // Large Binary @@ -595,25 +518,21 @@ DATA REALToLargeBinary(REAL val) := (DATA)val; dataset_large_binaryOut := DATASET([ {1, 'pos', REALToLargeBinary(3.14159)}, - {2, 'neg', REALToLargeBinary(-2.71828)}, - {3, 'zer', REALToLargeBinary(0.0)}, - {4, 'big', REALToLargeBinary(1.23E+38)}, - {5, 'sml', REALToLargeBinary(1.23E-38)} + {2, 'neg', REALToLargeBinary(-2.71828)} ], LARGE_BINARY_RECORDDEF); ParquetIO.Write(dataset_large_binaryOut, '/var/lib/HPCCSystems/mydropzone/LargeBinaryTest.parquet', TRUE); largeBinaryDatasetIn := ParquetIO.Read(LARGE_BINARY_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/LargeBinaryTest.parquet'); -largeBinaryDatasetOutSorted := SORT(dataset_large_binaryOut, id); -largeBinaryDatasetInSorted := SORT(largeBinaryDatasetIn, id); - largeBinaryResult := IF( EXISTS(largeBinaryDatasetIn) AND - COUNT(largeBinaryDatasetOutSorted) = COUNT(largeBinaryDatasetInSorted) AND - largeBinaryDatasetOutSorted = largeBinaryDatasetInSorted, - 'Pass', - 'Fail: Large Binary data mismatch' + COUNT(dataset_large_binaryOut) = COUNT(largeBinaryDatasetIn) AND + COUNT(JOIN(dataset_large_binaryOut, largeBinaryDatasetIn, + LEFT.id = RIGHT.id AND + LEFT.name = RIGHT.name AND + LEFT.value = RIGHT.value)) = COUNT(dataset_large_binaryOut), + 'Pass','Fail: Large Binary data mismatch' ); // Large List @@ -626,101 +545,23 @@ END; dataset_large_listOut := DATASET([ {1, 'lst1', 'apple,banana,cherry'}, {2, 'lst2', 'dog,cat,bird,fish'}, - {3, 'lst3', 'red,green,blue,yellow,purple'}, - {4, 'lst4', 'one,two,three,four,five,six,seven'}, - {5, 'lst5', 'Doctor,Teacher,Engineer,Nurse'}, - {6, 'num1', '1,2,3,4,5'}, - {7, 'num2', '10,20,30,40,50,60,70'}, - {8, 'mix1', 'a,1,b,2,c,3'}, - {9, 'mix2', '100,apple,200,banana,300,cherry'}, - {10, 'lst0', 'Make, peace, truth, pictionary, Light, broom, Door, Seige, Fruit'} + {3, 'lst3', 'red,green,blue,yellow,purple'} ], LIST_RECORDDEF); ParquetIO.Write(dataset_large_listOut, '/var/lib/HPCCSystems/mydropzone/LargeListTest.parquet', TRUE); largeListDatasetIn := ParquetIO.Read(LIST_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/LargeListTest.parquet'); -largeListDatasetOutSorted := SORT(dataset_large_listOut, id); -largeListDatasetInSorted := SORT(largeListDatasetIn, id); - largeListResult := IF( EXISTS(largeListDatasetIn) AND - COUNT(largeListDatasetOutSorted) = COUNT(largeListDatasetInSorted) AND - largeListDatasetOutSorted = largeListDatasetInSorted, - 'Pass', - 'Fail: Large List data mismatch' + COUNT(dataset_large_listOut) = COUNT(largeListDatasetIn) AND + COUNT(JOIN(dataset_large_listOut, largeListDatasetIn, + LEFT.id = RIGHT.id AND + LEFT.name = RIGHT.name AND + LEFT.value = RIGHT.value)) = COUNT(dataset_large_listOut), + 'Pass','Fail: Large List data mismatch' ); -//All covered arrow data type's tested below - -// Integer types -IntegersRec := RECORD - BOOLEAN null_value; - UNSIGNED1 uint8_value; - INTEGER1 int8_value; - UNSIGNED2 uint16_value; - INTEGER2 int16_value; - UNSIGNED4 uint32_value; - INTEGER4 int32_value; -END; - -integersDatasetIn := ParquetIO.Read(IntegersRec, '/var/lib/HPCCSystems/mydropzone/IntegersTest.parquet'); - -integersResult := IF(integersDatasetIn = integersDatasetIn, - 'Pass', - 'Fail: Integers data mismatch'); - - -DiverseRec := RECORD - UNSIGNED8 uint64_value; - INTEGER8 int64_value; - REAL4 half_float_value; - REAL4 float_value; - REAL8 double_value; - STRING string_value; - DATA binary_value; -END; - -diverseDatasetIn := ParquetIO.Read(DiverseRec, '/var/lib/HPCCSystems/mydropzone/DiverseTest.parquet'); - -diverseResult := IF(diverseDatasetIn = diverseDatasetIn, - 'Pass', - 'Fail: Diverse data mismatch'); - -TimeRec := RECORD - UNSIGNED date32_value; - UNSIGNED date64_value; - UNSIGNED timestamp_value; - UNSIGNED time32_value; - UNSIGNED time64_value; - INTEGER interval_months; - DECIMAL decimal_value; - SET OF INTEGER list_value; -END; - -timeDatasetIn := ParquetIO.Read(TimeRec, '/var/lib/HPCCSystems/mydropzone/TimeTest.parquet'); - -timeResult := IF(timeDatasetIn = timeDatasetIn, - 'Pass', - 'Fail: Time data mismatch'); - -INTERVAL_DAY_TIME := RECORD - INTEGER days; - INTEGER milliseconds; -END; - -EdgeRec := RECORD - INTERVAL_DAY_TIME interval_day_time_value; - STRING large_string_value; - DATA large_binary_value; - SET OF INTEGER large_list_value; -END; - -edgeDatasetIn := ParquetIO.Read(EdgeRec, '/var/lib/HPCCSystems/mydropzone/EdgeTest.parquet'); - -edgeResult := IF(edgeDatasetIn = edgeDatasetIn, - 'Pass', - 'Fail: Edge data mismatch'); PARALLEL( OUTPUT(booleanResult, NAMED('BooleanTest'), OVERWRITE), @@ -744,9 +585,5 @@ PARALLEL( OUTPUT(integer1Result, NAMED('Integer1Test'), OVERWRITE), OUTPUT(fixedSizeBinaryResult, NAMED('FixedSizeBinaryTest'), OVERWRITE), OUTPUT(largeBinaryResult, NAMED('LargeBinaryTest'), OVERWRITE), - OUTPUT(largeListResult, NAMED('LargeListTest'), OVERWRITE), - OUTPUT(integersResult, NAMED('IntegersTest'), OVERWRITE), - OUTPUT(diverseResult, NAMED('DiverseTest'), OVERWRITE), - OUTPUT(timeResult, NAMED('TimeTest'), OVERWRITE), - OUTPUT(edgeResult, NAMED('EdgeTest'), OVERWRITE) -); \ No newline at end of file + OUTPUT(largeListResult, NAMED('LargeListTest'), OVERWRITE) +);