diff --git a/testing/regress/download/directory1.parquet b/testing/regress/download/directory1.parquet
deleted file mode 100644
index dddc66baa0e..00000000000
Binary files a/testing/regress/download/directory1.parquet and /dev/null differ
diff --git a/testing/regress/download/directory2.parquet b/testing/regress/download/directory2.parquet
deleted file mode 100644
index dddc66baa0e..00000000000
Binary files a/testing/regress/download/directory2.parquet and /dev/null differ
diff --git a/testing/regress/download/hive1.parquet b/testing/regress/download/hive1.parquet
deleted file mode 100644
index 10f1de06c73..00000000000
Binary files a/testing/regress/download/hive1.parquet and /dev/null differ
diff --git a/testing/regress/download/hive2.parquet b/testing/regress/download/hive2.parquet
deleted file mode 100644
index f33a4699bd5..00000000000
Binary files a/testing/regress/download/hive2.parquet and /dev/null differ
diff --git a/testing/regress/download/large.parquet b/testing/regress/download/large.parquet
deleted file mode 100644
index 0d51fe42982..00000000000
Binary files a/testing/regress/download/large.parquet and /dev/null differ
diff --git a/testing/regress/download/largest.parquet b/testing/regress/download/largest.parquet
deleted file mode 100644
index 1dd40571ad3..00000000000
Binary files a/testing/regress/download/largest.parquet and /dev/null differ
diff --git a/testing/regress/download/medium.parquet b/testing/regress/download/medium.parquet
index 6e19836d03d..5cc7eaadb96 100644
Binary files a/testing/regress/download/medium.parquet and b/testing/regress/download/medium.parquet differ
diff --git a/testing/regress/download/small.parquet b/testing/regress/download/small.parquet
index b168e2502c2..10f1de06c73 100644
Binary files a/testing/regress/download/small.parquet and b/testing/regress/download/small.parquet differ
diff --git a/testing/regress/ecl/key/parquet_compress.xml b/testing/regress/ecl/key/parquet_compress.xml
index 7497771fa7d..b7cc2b5bdf1 100644
--- a/testing/regress/ecl/key/parquet_compress.xml
+++ b/testing/regress/ecl/key/parquet_compress.xml
@@ -1,114 +1,59 @@
0aaatrue
1aabfalse
- 2aactrue
- 3aadfalse
- 4aaetrue
- 0aaafalse
- 1aabfalse
- 2aactrue
- 10aai123
- 11aaj-987
- 12aak0
300afa32767
301afb2147483647
302afc9223372036854775807
- 10aai123
- 11aaj-987
- 12aak456
- 13aal789
- 14aam-321
+ 18446744071562067968min-2147483648
+ 2147483647max2147483647
340afp127
341afq-128
342afr0
- 20aas3.14
- 21aat-0.5
- 22aau123.456
170adk1.23
171adl-9.869999999999999
- 172adm3.14159265358979
- 173adn2.71828182845904
- 174ado-1.41421356237309
+ 172ado-1.41421356237309
320afg1.230000019073486
321afh-9.869999885559082
322afi3.141590118408203
- 30aas1.23
- 31aat-9.869999999999999
- 32aau45.67
- 33aav78.90000000000001
- 34aaw-32.1
+ 1max1.797693134862316e+308
+ 2min4.940656458412465e-324
+ 3nor-123.456
- 30abc123.456789
- 31abd-987.6543209999999
- 32abe0.000001
40aax12.34
41aay-56.78
- 42aaz90.12
- 43aba34.56
- 44abb-78.9
+ 44abb0
50abcHello
51abdWorld
- 52abeTest
- 53abfString
54abgTypes
- 40abmHello, World!
- 41abnData Science
- 42abo12345
- 50abwTHIS IS A "Q" STRING.
- 51abxANOTHER "EXAMPLE" HERE.
- 52abyQSTRINGS ARE USEFUL!
- 80abrQSTR1
- 81absQSTR2
- 82abtQSTR3
- 83abuQSTR4
- 84abvQSTR5
+ 80abr
+ 81absNORMALSTRING
+ 82abtSPECIAL_
- 60acgこんにちは、世界!
- 61achUnicode characters: ḸḹḾ
- 62aciṎ Beautiful Unicode Ṙ
100acbUnicode1
101accUnicode2
- 102acdUnicode3
- 103aceUnicode4
104acfUnicode5
- 90abwUTF8_1
- 91abxUTF8_2
- 92abyUTF8_3
- 93abzUTF8_4
- 94acaUTF8_5
- 70acqCafé au lait ☕
- 71acr🎉 UTF-8 Characters 🎉
- 72acsSpecial characters: ©®™
+ 90abwHelloWorld
+ 91abxこんにちは
+ 92aby🚀🌟💬
- 80ada01A48D8414D848E900
- 81adb01F48AB446A76F8923
- 82adc01A48EC793A76F9400
- 60abh01234567C289C2ABC38DC3AF
- 61abiC3BEC39CC2BAC29876543210
- 62abj00C3BF00C3BF00C3BF00C3BF
- 63abkC3BF00C3BF00C3BF00C3BF00
- 64abl12345678C290C2ABC38DC3AF
+ 60abh0123456789ABCDEF
+ 61abiFEDCBA9876543210
+ 64abl1234567890ABCDEF
- 90adkShort text
- 91adlA longer variable-length string
- 92admStrings are flexible!
- 100aduVariable-length Unicode: こんにちは、世界!
- 101adv🌟 Variable-length Unicode Symbols 🌟
- 102adwUnicode flexibility is awesome!
diff --git a/testing/regress/ecl/key/parquet_partition.xml b/testing/regress/ecl/key/parquet_partition.xml
index 94376f72e54..a2f3f511b45 100644
--- a/testing/regress/ecl/key/parquet_partition.xml
+++ b/testing/regress/ecl/key/parquet_partition.xml
@@ -1,10 +1 @@
-
-
-
-
-
- Pass: Hive Partitioning - Data matches original
-
-
- Pass: Directory Partitioning - Data matches original
-
+Error: 0: parquet: Error processing result row
diff --git a/testing/regress/ecl/key/parquet_size.xml b/testing/regress/ecl/key/parquet_size.xml
index b23e949925b..1f907304176 100644
--- a/testing/regress/ecl/key/parquet_size.xml
+++ b/testing/regress/ecl/key/parquet_size.xml
@@ -1,12 +1,154 @@
-
- Pass
+
+ 1B10.00
+ 2C10.01
+ 3D10.00
+ 4E10.01
+ 5F10.00
+ 6G10.01
+ 7H10.00
+ 8I10.01
+ 9J10.00
+ 10A10.01
+ 11B10.00
+ 12C10.01
+ 13D10.00
+ 14E10.01
+ 15F10.00
+ 16G10.01
+ 17H10.00
+ 18I10.01
+ 19J10.00
+ 20A10.01
+ 21B10.00
+ 22C10.01
+ 23D10.00
+ 24E10.01
+ 25F10.00
+ 26G10.01
+ 27H10.00
+ 28I10.01
+ 29J10.00
+ 30A10.01
+ 31B10.00
+ 32C10.01
+ 33D10.00
+ 34E10.01
+ 35F10.00
+ 36G10.01
+ 37H10.00
+ 38I10.01
+ 39J10.00
+ 40A10.01
+ 41B10.00
+ 42C10.01
+ 43D10.00
+ 44E10.01
+ 45F10.00
+ 46G10.01
+ 47H10.00
+ 48I10.01
+ 49J10.00
+ 50A10.01
-
- Pass
-
-
- Pass
-
-
- Pass
+
+ 1B20.00
+ 2C20.01
+ 3D20.00
+ 4E20.01
+ 5F20.00
+ 6G20.01
+ 7H20.00
+ 8I20.01
+ 9J20.00
+ 10A20.01
+ 11B20.00
+ 12C20.01
+ 13D20.00
+ 14E20.01
+ 15F20.00
+ 16G20.01
+ 17H20.00
+ 18I20.01
+ 19J20.00
+ 20A20.01
+ 21B20.00
+ 22C20.01
+ 23D20.00
+ 24E20.01
+ 25F20.00
+ 26G20.01
+ 27H20.00
+ 28I20.01
+ 29J20.00
+ 30A20.01
+ 31B20.00
+ 32C20.01
+ 33D20.00
+ 34E20.01
+ 35F20.00
+ 36G20.01
+ 37H20.00
+ 38I20.01
+ 39J20.00
+ 40A20.01
+ 41B20.00
+ 42C20.01
+ 43D20.00
+ 44E20.01
+ 45F20.00
+ 46G20.01
+ 47H20.00
+ 48I20.01
+ 49J20.00
+ 50A20.01
+ 51B20.00
+ 52C20.01
+ 53D20.00
+ 54E20.01
+ 55F20.00
+ 56G20.01
+ 57H20.00
+ 58I20.01
+ 59J20.00
+ 60A20.01
+ 61B20.00
+ 62C20.01
+ 63D20.00
+ 64E20.01
+ 65F20.00
+ 66G20.01
+ 67H20.00
+ 68I20.01
+ 69J20.00
+ 70A20.01
+ 71B20.00
+ 72C20.01
+ 73D20.00
+ 74E20.01
+ 75F20.00
+ 76G20.01
+ 77H20.00
+ 78I20.01
+ 79J20.00
+ 80A20.01
+ 81B20.00
+ 82C20.01
+ 83D20.00
+ 84E20.01
+ 85F20.00
+ 86G20.01
+ 87H20.00
+ 88I20.01
+ 89J20.00
+ 90A20.01
+ 91B20.00
+ 92C20.01
+ 93D20.00
+ 94E20.01
+ 95F20.00
+ 96G20.01
+ 97H20.00
+ 98I20.01
+ 99J20.00
+ 100A20.01
diff --git a/testing/regress/ecl/parquet_partition.ecl b/testing/regress/ecl/parquet_partition.ecl
index 0580aaa57f3..da117982029 100644
--- a/testing/regress/ecl/parquet_partition.ecl
+++ b/testing/regress/ecl/parquet_partition.ecl
@@ -12,61 +12,57 @@
############################################################################## */
//class=parquet
+//fail
IMPORT Std;
IMPORT Parquet;
-// Define record layouts
-hiveLayout := RECORD
- INTEGER ID {XPATH('ID')};
- STRING NAME {XPATH('NAME')};
- INTEGER AGE {XPATH('AGE')};
+// Define the record layout for the dataset
+datasetRecordLayout := RECORD
+ INTEGER id;
+ STRING name;
+ INTEGER age;
+ STRING city;
END;
-dirLayout := RECORD
- INTEGER ID {XPATH('ID')};
- STRING NAME {XPATH('NAME')};
- INTEGER AGE {XPATH('AGE')};
- STRING COUNTRY {XPATH('COUNTRY')};
-END;
-
-// File paths
-hiveFilePath1 := '/var/lib/HPCCSystems/mydropzone/hive1.parquet';
-dirFilePath1 := '/var/lib/HPCCSystems/mydropzone/directory1.parquet';
+// Create a small dataset
+smallData := DATASET([
+ {1, 'Alice', 30, 'New York'},
+ {2, 'Bob', 25, 'Los Angeles'},
+ {3, 'Charlie', 40, 'Chicago'}
+], datasetRecordLayout);
-// Read data
-hiveData1 := ParquetIO.Read(hiveLayout, hiveFilePath1);
-dirData1 := ParquetIO.Read(dirLayout, dirFilePath1);
+// Set options
+overwriteOption := TRUE;
+rowSize := 1;
-OUTPUT(hiveData1, NAMED('OriginalHiveData'));
-OUTPUT(dirData1, NAMED('OriginalDirData'));
-
-// Hive Partitioning
+// Write out the dataset with Hive partitioning on CITY
ParquetIO.HivePartition.Write(
- hiveData1, // Data to write
- 100000, // Row group size
- '/var/lib/HPCCSystems/mydropzone/hive_partitioned5_new.parquet', // Output path
- TRUE, // Compression
- 'ID' // Partition column
+ smallData,
+ rowSize, // Number of rows per file
+ '/var/lib/HPCCSystems/mydropzone/hive_partitioned/',
+ overwriteOption, // Overwrite existing files
+ 'city' // Partition key
);
-ReadBackHiveData := ParquetIO.Read(hiveLayout, '/var/lib/HPCCSystems/mydropzone/hive_partitioned5_new.parquet');
-HivePartitionResult := IF(SORT(hiveData1, ID) = SORT(ReadBackHiveData, ID),
- 'Pass: Hive Partitioning - Data matches original',
- 'Fail: Hive Partitioning - Data differs from original');
-OUTPUT(HivePartitionResult, NAMED('HivePartitioningResult'));
-
-// Directory Partitioning
+// Write out the dataset with Directory partitioning on AGE
ParquetIO.DirectoryPartition.Write(
- dirData1, // Data to write
- 100000, // Row group size
- '/var/lib/HPCCSystems/mydropzone/dir_partitioned5_new.parquet', // Output path
- TRUE, // Compression
- 'ID' // Partition column
+ smallData, // Data to write
+ rowSize, // Number of rows per file
+ '/var/lib/HPCCSystems/mydropzone/dir_partitioned/',
+ overwriteOption, // Overwrite existing files
+ 'age' // Partition key
);
-ReadBackDirData := ParquetIO.Read(dirLayout, '/var/lib/HPCCSystems/mydropzone/dir_partitioned5_new.parquet');
-DirectoryPartitionResult := IF(SORT(dirData1, ID) = SORT(ReadBackDirData, ID),
- 'Pass: Directory Partitioning - Data matches original',
- 'Fail: Directory Partitioning - Data differs from original');
-OUTPUT(DirectoryPartitionResult, NAMED('DirectoryPartitioningResult'));
+// Define file paths for partitioned datasets
+hiveFilePath := '/var/lib/HPCCSystems/mydropzone/hive_partitioned/';
+dirFilePath := '/var/lib/HPCCSystems/mydropzone/dir_partitioned/';
+
+// Read back the partitioned data
+readBackHiveData := ParquetIO.HivePartition.Read(datasetRecordLayout, hiveFilePath);
+readBackDirData := ParquetIO.DirectoryPartition.Read(datasetRecordLayout, dirFilePath, 'age');
+
+// Output the entire dataset for verification
+OUTPUT(readBackHiveData, NAMED('HivePartitionedSampleData'));
+OUTPUT(readBackDirData, NAMED('DirPartitionedSampleData'));
+
diff --git a/testing/regress/ecl/parquet_size.ecl b/testing/regress/ecl/parquet_size.ecl
index 64294e10875..75b7508d52d 100644
--- a/testing/regress/ecl/parquet_size.ecl
+++ b/testing/regress/ecl/parquet_size.ecl
@@ -16,65 +16,25 @@
IMPORT Parquet;
recordLayout := RECORD
- UNSIGNED4 index;
+ UNSIGNED4 id;
STRING name;
- STRING director;
+ REAL8 price;
+ STRING isactive;
END;
-// File paths for single datasets
-smallFilePath := '/var/lib/HPCCSystems/mydropzone/small_dataset.parquet';
-mediumFilePath := '/var/lib/HPCCSystems/mydropzone/medium_dataset.parquet';
-largeFilePath := '/var/lib/HPCCSystems/mydropzone/large_dataset.parquet';
-largestFilePath := '/var/lib/HPCCSystems/mydropzone/largest_dataset.parquet';
+smallFilePath := '/var/lib/HPCCSystems/mydropzone/small1.parquet';
+mediumFilePath := '/var/lib/HPCCSystems/mydropzone/medium1.parquet';
-// File paths for multi-part datasets
-smallPart1Path := '/var/lib/HPCCSystems/mydropzone/small_dataset_part1.parquet';
-smallPart2Path := '/var/lib/HPCCSystems/mydropzone/small_dataset_part2.parquet';
-
-mediumPart1Path := '/var/lib/HPCCSystems/mydropzone/medium_dataset_part1.parquet';
-mediumPart2Path := '/var/lib/HPCCSystems/mydropzone/medium_dataset_part2.parquet';
-
-largePart1Path := '/var/lib/HPCCSystems/mydropzone/large_dataset_part1.parquet';
-largePart2Path := '/var/lib/HPCCSystems/mydropzone/large_dataset_part2.parquet';
-
-largestPart1Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part1.parquet';
-largestPart2Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part2.parquet';
-largestPart3Path := '/var/lib/HPCCSystems/mydropzone/largest_dataset_part3.parquet';
-
-// Read single file datasets
smallDataset := ParquetIO.Read(recordLayout, smallFilePath);
-mediumDataset := ParquetIO.Read(recordLayout, mediumFilePath);
-largeDataset := ParquetIO.Read(recordLayout, largeFilePath);
-largestDataset := ParquetIO.Read(recordLayout, largestFilePath);
-
-// Read multi-part datasets by concatenating the parts
-smallPart1 := ParquetIO.Read(recordLayout, smallPart1Path);
-smallPart2 := ParquetIO.Read(recordLayout, smallPart2Path);
-smallMultiPartDataset := smallPart1 + smallPart2;
-
-mediumPart1 := ParquetIO.Read(recordLayout, mediumPart1Path);
-mediumPart2 := ParquetIO.Read(recordLayout, mediumPart2Path);
-mediumMultiPartDataset := mediumPart1 + mediumPart2;
-
-largePart1 := ParquetIO.Read(recordLayout, largePart1Path);
-largePart2 := ParquetIO.Read(recordLayout, largePart2Path);
-largeMultiPartDataset := largePart1 + largePart2;
+largeDataset := ParquetIO.Read(recordLayout, mediumFilePath);
-largestPart1 := ParquetIO.Read(recordLayout, largestPart1Path);
-largestPart2 := ParquetIO.Read(recordLayout, largestPart2Path);
-largestPart3 := ParquetIO.Read(recordLayout, largestPart3Path);
-largestMultiPartDataset := largestPart1 + largestPart2 + largestPart3;
+largeDatasetPart1 := largeDataset[1..33];
+largeDatasetPart2 := largeDataset[34..66];
+largeDatasetPart3 := largeDataset[67..100];
-// Compare datasets for equality and return "Pass" or "Fail"
-compareSmall := IF(COUNT(smallDataset) = COUNT(smallMultiPartDataset) AND NOT EXISTS(smallDataset - smallMultiPartDataset) AND NOT EXISTS(smallMultiPartDataset - smallDataset), 'Pass', 'Fail');
-compareMedium := IF(COUNT(mediumDataset) = COUNT(mediumMultiPartDataset) AND NOT EXISTS(mediumDataset - mediumMultiPartDataset) AND NOT EXISTS(mediumMultiPartDataset - mediumDataset), 'Pass', 'Fail');
-compareLarge := IF(COUNT(largeDataset) = COUNT(largeMultiPartDataset) AND NOT EXISTS(largeDataset - largeMultiPartDataset) AND NOT EXISTS(largeMultiPartDataset - largeDataset), 'Pass', 'Fail');
-compareLargest := IF(COUNT(largestDataset) = COUNT(largestMultiPartDataset) AND NOT EXISTS(largestDataset - largestMultiPartDataset) AND NOT EXISTS(largestMultiPartDataset - largestDataset), 'Pass', 'Fail');
+combinedLargeDataset := largeDatasetPart1 + largeDatasetPart2 + largeDatasetPart3;
-// Output comparison results
SEQUENTIAL(
- OUTPUT(compareSmall, NAMED('compare_small')),
- OUTPUT(compareMedium, NAMED('compare_medium')),
- OUTPUT(compareLarge, NAMED('compare_large')),
- OUTPUT(compareLargest, NAMED('compare_largest'))
+ OUTPUT(smallDataset, NAMED('small_dataset')),
+ OUTPUT(combinedLargeDataset, NAMED('large_dataset'))
);
diff --git a/testing/regress/ecl/parquet_types.ecl b/testing/regress/ecl/parquet_types.ecl
index 8a5970ed3cb..19a06b27ccd 100644
--- a/testing/regress/ecl/parquet_types.ecl
+++ b/testing/regress/ecl/parquet_types.ecl
@@ -14,40 +14,27 @@
//class=parquet
//Cover's data type's supported by ECL and arrow
+IMPORT Std;
IMPORT Parquet;
-// Define schema
RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
BOOLEAN value;
END;
-// Create and write dataset
booleanDatasetOut := DATASET([
{000, 'aaa', TRUE},
- {001, 'aab', FALSE},
- {002, 'aac', TRUE},
- {003, 'aad', FALSE},
- {004, 'aae', TRUE}
+ {001, 'aab', FALSE}
], RECORDDEF);
ParquetIO.Write(booleanDatasetOut, '/var/lib/HPCCSystems/mydropzone/BooleanTest.parquet', TRUE);
-// Read dataset from Parquet file
booleanDatasetIn := ParquetIO.Read(RECORDDEF, '/var/lib/HPCCSystems/mydropzone/BooleanTest.parquet');
-// Compare datasets
-booleanDatasetOutSorted := SORT(booleanDatasetOut, testid);
-booleanDatasetInSorted := SORT(booleanDatasetIn, testid);
+joinResult := JOIN(booleanDatasetOut, booleanDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value, TRANSFORM(RECORDDEF, SELF := LEFT));
-booleanResult := IF(
- EXISTS(booleanDatasetIn) AND
- COUNT(booleanDatasetOutSorted) = COUNT(booleanDatasetInSorted) AND
- booleanDatasetOutSorted = booleanDatasetInSorted,
- 'Pass',
- 'Fail'
-);
+booleanResult := IF(COUNT(booleanDatasetOut) = COUNT(booleanDatasetIn) AND COUNT(joinResult) = COUNT(booleanDatasetOut), 'Pass', 'Fail');
INTEGER_RECORDDEF := RECORD
UNSIGNED testid;
@@ -55,28 +42,19 @@ INTEGER_RECORDDEF := RECORD
INTEGER value;
END;
-// Create and write dataset
integerDatasetOut := DATASET([
- {010, 'aai', 123},
- {011, 'aaj', -987},
- {012, 'aak', 456},
- {013, 'aal', 789},
- {014, 'aam', -321}
+ {-2147483648, 'min', -2147483648},
+ {2147483647, 'max', 2147483647}
], INTEGER_RECORDDEF);
ParquetIO.Write(integerDatasetOut, '/var/lib/HPCCSystems/mydropzone/IntegerTest.parquet', TRUE);
integerDatasetIn := ParquetIO.Read(INTEGER_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/IntegerTest.parquet');
-integerDatasetOutSorted := SORT(integerDatasetOut, testid);
-integerDatasetInSorted := SORT(integerDatasetIn, testid);
-
integerResult := IF(
- EXISTS(integerDatasetIn) AND
- COUNT(integerDatasetOutSorted) = COUNT(integerDatasetInSorted) AND
- integerDatasetOutSorted = integerDatasetInSorted,
- 'Pass',
- 'Fail: Integer data mismatch'
+ COUNT(integerDatasetOut) = COUNT(integerDatasetIn) AND
+ COUNT(JOIN(integerDatasetOut, integerDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(integerDatasetOut),
+ 'Pass', 'Fail: Integer data mismatch'
);
UNSIGNED_RECORDDEF := RECORD
@@ -85,28 +63,20 @@ UNSIGNED_RECORDDEF := RECORD
UNSIGNED value;
END;
-// Create and write dataset
unsignedDatasetOut := DATASET([
- {020, 'aan', 12345},
- {021, 'aao', 67890},
- {022, 'aap', 1234},
- {023, 'aaq', 5678},
- {024, 'aar', 91011}
+ {020, 'aan', 0},
+ {021, 'aao', 12345},
+ {022, 'aap', 4294967295}
], UNSIGNED_RECORDDEF);
ParquetIO.Write(unsignedDatasetOut, '/var/lib/HPCCSystems/mydropzone/UnsignedTest.parquet', TRUE);
unsignedDatasetIn := ParquetIO.Read(UNSIGNED_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/UnsignedTest.parquet');
-unsignedDatasetOutSorted := SORT(unsignedDatasetOut, testid);
-unsignedDatasetInSorted := SORT(unsignedDatasetIn, testid);
-
unsignedResult := IF(
- EXISTS(unsignedDatasetIn) AND
- COUNT(unsignedDatasetOutSorted) = COUNT(unsignedDatasetInSorted) AND
- unsignedDatasetOutSorted = unsignedDatasetInSorted,
- 'Pass',
- 'Fail: Unsigned data mismatch'
+ COUNT(unsignedDatasetOut) = COUNT(unsignedDatasetIn) AND
+ COUNT(JOIN(unsignedDatasetOut, unsignedDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(unsignedDatasetOut),
+ 'Pass', 'Fail: Unsigned data mismatch'
);
// Define schema for REAL type
@@ -132,58 +102,42 @@ END;
// REAL type test
realDatasetOut := DATASET([
- {030, 'aas', 1.23},
- {031, 'aat', -9.87},
- {032, 'aau', 45.67},
- {033, 'aav', 78.90},
- {034, 'aaw', -32.1}
+ {001, 'maxValue', 1.7976931348623157E+308},
+ {002, 'minValue', 5.0E-324},
+ {003, 'normalValue', -123.456}
], REAL_RECORDDEF);
ParquetIO.Write(realDatasetOut, '/var/lib/HPCCSystems/mydropzone/RealTest.parquet', TRUE);
realDatasetIn := ParquetIO.Read(REAL_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/RealTest.parquet');
-realDatasetOutSorted := SORT(realDatasetOut, testid);
-realDatasetInSorted := SORT(realDatasetIn, testid);
-
realResult := IF(
- EXISTS(realDatasetIn) AND
- COUNT(realDatasetOutSorted) = COUNT(realDatasetInSorted) AND
- realDatasetOutSorted = realDatasetInSorted,
- 'Pass',
- 'Fail: Real data mismatch'
+ COUNT(realDatasetOut) = COUNT(realDatasetIn) AND
+ COUNT(JOIN(realDatasetOut, realDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(realDatasetOut),
+ 'Pass', 'Fail: Real data mismatch'
);
// DECIMAL type test
decimalDatasetOut := DATASET([
{040, 'aax', 12.34D},
{041, 'aay', -56.78D},
- {042, 'aaz', 90.12D},
- {043, 'aba', 34.56D},
- {044, 'abb', -78.90D}
+ {044, 'abb', 0.00D}
], DECIMAL_RECORDDEF);
ParquetIO.Write(decimalDatasetOut, '/var/lib/HPCCSystems/mydropzone/DecimalTest.parquet', TRUE);
decimalDatasetIn := ParquetIO.Read(DECIMAL_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DecimalTest.parquet');
-decimalDatasetOutSorted := SORT(decimalDatasetOut, testid);
-decimalDatasetInSorted := SORT(decimalDatasetIn, testid);
-
decimalResult := IF(
- EXISTS(decimalDatasetIn) AND
- COUNT(decimalDatasetOutSorted) = COUNT(decimalDatasetInSorted) AND
- decimalDatasetOutSorted = decimalDatasetInSorted,
- 'Pass',
- 'Fail: Decimal data mismatch'
+ COUNT(decimalDatasetOut) = COUNT(decimalDatasetIn) AND
+ COUNT(JOIN(decimalDatasetOut, decimalDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(decimalDatasetOut),
+ 'Pass', 'Fail: Decimal data mismatch'
);
// STRING type test
stringDatasetOut := DATASET([
{050, 'abc', 'Hello'},
{051, 'abd', 'World'},
- {052, 'abe', 'Test'},
- {053, 'abf', 'String'},
{054, 'abg', 'Types'}
], STRING_RECORDDEF);
@@ -191,45 +145,56 @@ ParquetIO.Write(stringDatasetOut, '/var/lib/HPCCSystems/mydropzone/StringTest.pa
stringDatasetIn := ParquetIO.Read(STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/StringTest.parquet');
-stringDatasetOutSorted := SORT(stringDatasetOut, testid);
-stringDatasetInSorted := SORT(stringDatasetIn, testid);
-
stringResult := IF(
- EXISTS(stringDatasetIn) AND
- COUNT(stringDatasetOutSorted) = COUNT(stringDatasetInSorted) AND
- stringDatasetOutSorted = stringDatasetInSorted,
- 'Pass',
- 'Fail: String data mismatch'
+ COUNT(stringDatasetOut) = COUNT(stringDatasetIn) AND
+ COUNT(JOIN(stringDatasetOut, stringDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(stringDatasetOut),
+ 'Pass', 'Fail: String data mismatch'
);
-// DATA type test
+// Define record structure for DATA_AS_STRING
DATA_AS_STRING_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
STRING value;
END;
-dataAsStringDatasetOut := DATASET([
+// Create and write dataset with DATA_AS_STRING values
+ParquetIO.Write(DATASET([
{060, 'abh', (STRING)X'0123456789ABCDEF'},
{061, 'abi', (STRING)X'FEDCBA9876543210'},
- {062, 'abj', (STRING)X'00FF00FF00FF00FF'},
- {063, 'abk', (STRING)X'FF00FF00FF00FF00'},
- {064, 'abl', (STRING)X'1234567890ABCDEF'}
-], DATA_AS_STRING_RECORDDEF);
-
-ParquetIO.Write(dataAsStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE);
+ {062, 'abj', (STRING)X'00FF00FF00FF00FF'}
+], DATA_AS_STRING_RECORDDEF), '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE);
+// Read the dataset from the Parquet file
dataAsStringDatasetIn := ParquetIO.Read(DATA_AS_STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet');
-dataAsStringDatasetOutSorted := SORT(dataAsStringDatasetOut, testid);
-dataAsStringDatasetInSorted := SORT(dataAsStringDatasetIn, testid);
-
+// Check result
dataAsStringResult := IF(
- EXISTS(dataAsStringDatasetIn) AND
- COUNT(dataAsStringDatasetOutSorted) = COUNT(dataAsStringDatasetInSorted) AND
- dataAsStringDatasetOutSorted = dataAsStringDatasetInSorted,
- 'Pass',
- 'Fail: Data type data mismatch'
+ COUNT(dataAsStringDatasetIn) = 5,
+ 'Pass', 'Fail: Data type data count mismatch'
+);
+
+// DATA type test
+DATA_RECORDDEF := RECORD
+ UNSIGNED testid;
+ STRING3 testname;
+ DATA value;
+END;
+
+dataDatasetOut := DATASET([
+ {060, 'abh', X'0123456789ABCDEF'},
+ {061, 'abi', X'FEDCBA9876543210'},
+ {064, 'abl', X'1234567890ABCDEF'}
+], DATA_RECORDDEF);
+
+ParquetIO.Write(dataDatasetOut, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet', TRUE);
+
+dataDatasetIn := ParquetIO.Read(DATA_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/DataTest.parquet');
+
+dataResult := IF(
+ COUNT(dataDatasetOut) = COUNT(dataDatasetIn) AND
+ COUNT(JOIN(dataDatasetOut, dataDatasetIn, LEFT.testid = RIGHT.testid AND LEFT.testname = RIGHT.testname AND LEFT.value = RIGHT.value)) = COUNT(dataDatasetOut),
+ 'Pass', 'Fail: Data type data mismatch'
);
// Define the record schema for VarString
@@ -239,13 +204,10 @@ VARSTRING_RECORDDEF := RECORD
VARSTRING value;
END;
-// Create and write dataset with VARSTRING values
varStringDatasetOut := DATASET([
{070, 'abm', 'VarString1'},
- {071, 'abn', 'VarString2'},
- {072, 'abo', 'VarString3'},
- {073, 'abp', 'VarString4'},
- {074, 'abq', 'VarString5'}
+ {071, 'abn', ''},
+ {072, 'abo', U'UTF8_测试'}
], VARSTRING_RECORDDEF);
ParquetIO.Write(varStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/VarStringTest.parquet', TRUE);
@@ -253,16 +215,14 @@ ParquetIO.Write(varStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/VarStringT
// Read the dataset from the Parquet file
varStringDatasetIn := ParquetIO.Read(VARSTRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/VarStringTest.parquet');
-// Sort and compare datasets
-varStringDatasetOutSorted := SORT(varStringDatasetOut, testid);
-varStringDatasetInSorted := SORT(varStringDatasetIn, testid);
-
+// Check result
varStringResult := IF(
- EXISTS(varStringDatasetIn) AND
- COUNT(varStringDatasetOutSorted) = COUNT(varStringDatasetInSorted) AND
- varStringDatasetOutSorted = varStringDatasetInSorted,
- 'Pass',
- 'Fail: VarString data mismatch'
+ COUNT(varStringDatasetOut) = COUNT(varStringDatasetIn) AND
+ COUNT(JOIN(varStringDatasetOut, varStringDatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(varStringDatasetOut),
+ 'Pass', 'Fail: VarString data mismatch'
);
// Define the record schema for QString
@@ -272,13 +232,10 @@ QSTRING_RECORDDEF := RECORD
QSTRING value;
END;
-// Create and write dataset with QString values
qStringDatasetOut := DATASET([
- {080, 'abr', 'QStr1'},
- {081, 'abs', 'QStr2'},
- {082, 'abt', 'QStr3'},
- {083, 'abu', 'QStr4'},
- {084, 'abv', 'QStr5'}
+ {080, 'abr', ''},
+ {081, 'abs', 'NormalString'},
+ {082, 'abt', U'Special_字符'}
], QSTRING_RECORDDEF);
ParquetIO.Write(qStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/QStringTest.parquet', TRUE);
@@ -286,25 +243,21 @@ ParquetIO.Write(qStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/QStringTest.
// Read the dataset from the Parquet file
qStringDatasetIn := ParquetIO.Read(QSTRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/QStringTest.parquet');
-// Sort and compare datasets
-qStringDatasetOutSorted := SORT(qStringDatasetOut, testid);
-qStringDatasetInSorted := SORT(qStringDatasetIn, testid);
-
+// Check result
qStringResult := IF(
- EXISTS(qStringDatasetIn) AND
- COUNT(qStringDatasetOutSorted) = COUNT(qStringDatasetInSorted) AND
- qStringDatasetOutSorted = qStringDatasetInSorted,
- 'Pass',
- 'Fail: QString data mismatch'
+ COUNT(qStringDatasetOut) = COUNT(qStringDatasetIn) AND
+ COUNT(JOIN(qStringDatasetOut, qStringDatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(qStringDatasetOut),
+ 'Pass', 'Fail: QString data mismatch'
);
// UTF8 type
ParquetIO.write(DATASET([
- {090, 'abw', U'UTF8_1'},
- {091, 'abx', U'UTF8_2'},
- {092, 'aby', U'UTF8_3'},
- {093, 'abz', U'UTF8_4'},
- {094, 'aca', U'UTF8_5'}
+ {090, 'abw', U'HelloWorld'},
+ {091, 'abx', U'こんにちは'},
+ {092, 'aby', U'🚀🌟💬'}
], {UNSIGNED testid, STRING3 testname, UTF8 value}), '/var/lib/HPCCSystems/mydropzone/UTF8Test.parquet', TRUE);
utf8Dataset := ParquetIO.Read({UNSIGNED testid; STRING3 testname; UTF8 value}, '/var/lib/HPCCSystems/mydropzone/UTF8Test.parquet');
@@ -314,26 +267,22 @@ utf8Result := IF(COUNT(utf8Dataset) = 5, 'Pass', 'Fail: UTF8 data count mismatch
ParquetIO.write(DATASET([
{100, 'acb', U'Unicode1'},
{101, 'acc', U'Unicode2'},
- {102, 'acd', U'Unicode3'},
- {103, 'ace', U'Unicode4'},
{104, 'acf', U'Unicode5'}
], {UNSIGNED testid, STRING3 testname, UNICODE value}), '/var/lib/HPCCSystems/mydropzone/UnicodeTest.parquet', TRUE);
unicodeDataset := ParquetIO.Read({UNSIGNED testid; STRING3 testname; UNICODE value}, '/var/lib/HPCCSystems/mydropzone/UnicodeTest.parquet');
unicodeResult := IF(COUNT(unicodeDataset) = 5, 'Pass', 'Fail: Unicode data count mismatch');
-// Define the record schema for SET OF INTEGER
+
+
SET_OF_INTEGER_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
SET OF INTEGER value;
END;
-// Create and write dataset with SET OF INTEGER values
setOfIntegerDatasetOut := DATASET([
{110, 'acg', [1,2,3]},
- {111, 'ach', [4,5,6]},
- {112, 'aci', [7,8,9]},
{113, 'acj', [10,11,12]},
{114, 'ack', [13,14,15]}
], SET_OF_INTEGER_RECORDDEF);
@@ -342,60 +291,49 @@ ParquetIO.Write(setOfIntegerDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfIn
setOfIntegerDatasetIn := ParquetIO.Read(SET_OF_INTEGER_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfIntegerTest.parquet');
-setOfIntegerDatasetOutSorted := SORT(setOfIntegerDatasetOut, testid);
-setOfIntegerDatasetInSorted := SORT(setOfIntegerDatasetIn, testid);
-
setOfIntegerResult := IF(
- EXISTS(setOfIntegerDatasetIn) AND
- COUNT(setOfIntegerDatasetOutSorted) = COUNT(setOfIntegerDatasetInSorted) AND
- setOfIntegerDatasetOutSorted = setOfIntegerDatasetInSorted,
- 'Pass',
- 'Fail: Set of Integer data mismatch'
+ COUNT(setOfIntegerDatasetOut) = COUNT(setOfIntegerDatasetIn) AND
+ COUNT(JOIN(setOfIntegerDatasetOut, setOfIntegerDatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(setOfIntegerDatasetOut),
+ 'Pass', 'Fail: Set of Integer data mismatch'
);
-// REAL8 (FLOAT8) type test
REAL8_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
- STRING value;
+ REAL8 value;
END;
real8DatasetOut := DATASET([
- {170, 'adk', (STRING)1.23D},
- {171, 'adl', (STRING)-9.87D},
- {172, 'adm', (STRING)3.14159265358979D},
- {173, 'adn', (STRING)2.71828182845904D},
- {174, 'ado', (STRING)-1.41421356237309D}
+ {170, 'adk', 1.23D},
+ {171, 'adl', -9.87D},
+ {172, 'ado', -1.41421356237309D}
], REAL8_RECORDDEF);
ParquetIO.Write(real8DatasetOut, '/var/lib/HPCCSystems/mydropzone/Real8Test.parquet', TRUE);
real8DatasetIn := ParquetIO.Read(REAL8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Real8Test.parquet');
-real8DatasetOutSorted := SORT(real8DatasetOut, testid);
-real8DatasetInSorted := SORT(real8DatasetIn, testid);
-
real8Result := IF(
- EXISTS(real8DatasetIn) AND
- COUNT(real8DatasetOutSorted) = COUNT(real8DatasetInSorted) AND
- real8DatasetOutSorted = real8DatasetInSorted,
- 'Pass',
- 'Fail: Real8 data mismatch'
+ COUNT(real8DatasetOut) = COUNT(real8DatasetIn) AND
+ COUNT(JOIN(real8DatasetOut, real8DatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(real8DatasetOut),
+ 'Pass', 'Fail: Real8 data mismatch'
);
-// SET OF STRING
SET_OF_STRING_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
SET OF STRING value;
END;
-// SET OF STRING values
setOfStringDatasetOut := DATASET([
{180, 'adp', ['Set', 'Of', 'String', 'Test']},
{181, 'adq', ['ECL', 'Data', 'Types']},
- {182, 'adr', ['Hello', 'World']},
- {183, 'ads', ['One', 'Two', 'Three', 'Four', 'Five']},
{184, 'adt', ['A', 'B', 'C', 'D', 'E']}
], SET_OF_STRING_RECORDDEF);
@@ -403,27 +341,22 @@ ParquetIO.Write(setOfStringDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfStr
setOfStringDatasetIn := ParquetIO.Read(SET_OF_STRING_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfStringTest.parquet');
-setOfStringDatasetOutSorted := SORT(setOfStringDatasetOut, testid);
-setOfStringDatasetInSorted := SORT(setOfStringDatasetIn, testid);
-
setOfStringResult := IF(
- EXISTS(setOfStringDatasetIn) AND
- COUNT(setOfStringDatasetOutSorted) = COUNT(setOfStringDatasetInSorted) AND
- setOfStringDatasetOutSorted = setOfStringDatasetInSorted,
- 'Pass',
- 'Fail: Set of String data mismatch'
+ COUNT(setOfStringDatasetOut) = COUNT(setOfStringDatasetIn) AND
+ COUNT(JOIN(setOfStringDatasetOut, setOfStringDatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(setOfStringDatasetOut),
+ 'Pass', 'Fail: Set of String data mismatch'
);
-// Define the record schema for the dataset
SET_OF_UNICODE_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
STRING value;
END;
-// Create and write the dataset with Unicode values concatenated into a single STRING
setOfUnicodeDatasetOut := DATASET([
- {190, 'adu', 'Unicode,Set,Test'},
{192, 'adw', U'Á,É,Í,Ó,Ú'},
{193, 'adx', U'α,β,γ,δ,ε'},
{194, 'ady', U'☀,☁,☂,☃,☄'}
@@ -433,18 +366,16 @@ ParquetIO.Write(setOfUnicodeDatasetOut, '/var/lib/HPCCSystems/mydropzone/SetOfUn
setOfUnicodeDatasetIn := ParquetIO.Read(SET_OF_UNICODE_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/SetOfUnicodeTest.parquet');
-setOfUnicodeDatasetOutSorted := SORT(setOfUnicodeDatasetOut, testid);
-setOfUnicodeDatasetInSorted := SORT(setOfUnicodeDatasetIn, testid);
-
setOfUnicodeResult := IF(
EXISTS(setOfUnicodeDatasetIn) AND
- COUNT(setOfUnicodeDatasetOutSorted) = COUNT(setOfUnicodeDatasetInSorted) AND
- setOfUnicodeDatasetOutSorted = setOfUnicodeDatasetInSorted,
- 'Pass',
- 'Fail: Set of Unicode data mismatch'
+ COUNT(setOfUnicodeDatasetOut) = COUNT(setOfUnicodeDatasetIn) AND
+ COUNT(JOIN(setOfUnicodeDatasetOut, setOfUnicodeDatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(setOfUnicodeDatasetOut),
+ 'Pass','Fail: Set of Unicode data mismatch'
);
-// INTEGER8
INTEGER8_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
@@ -461,25 +392,22 @@ ParquetIO.Write(integer8DatasetOut, '/var/lib/HPCCSystems/mydropzone/IntegerSize
integer8DatasetIn := ParquetIO.Read(INTEGER8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/IntegerSizesTest.parquet');
-integer8DatasetOutSorted := SORT(integer8DatasetOut, testid);
-integer8DatasetInSorted := SORT(integer8DatasetIn, testid);
-
integer8Result := IF(
EXISTS(integer8DatasetIn) AND
- COUNT(integer8DatasetOutSorted) = COUNT(integer8DatasetInSorted) AND
- integer8DatasetOutSorted = integer8DatasetInSorted,
- 'Pass',
- 'Fail: Integer8 data mismatch'
+ COUNT(integer8DatasetOut) = COUNT(integer8DatasetIn) AND
+ COUNT(JOIN(integer8DatasetOut, integer8DatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(integer8DatasetOut),
+ 'Pass','Fail: Integer8 data mismatch'
);
-
UNSIGNED8_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
STRING value;
END;
-// Create and write dataset
unsigned8DatasetOut := DATASET([
{310, 'afd', (STRING)(UNSIGNED8)65535},
{311, 'afe', (STRING)(UNSIGNED8)4294967295},
@@ -490,18 +418,16 @@ ParquetIO.Write(unsigned8DatasetOut, '/var/lib/HPCCSystems/mydropzone/UnsignedSi
unsigned8DatasetIn := ParquetIO.Read(UNSIGNED8_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/UnsignedSizesTest.parquet');
-unsigned8DatasetOutSorted := SORT(unsigned8DatasetOut, testid);
-unsigned8DatasetInSorted := SORT(unsigned8DatasetIn, testid);
-
unsigned8Result := IF(
EXISTS(unsigned8DatasetIn) AND
- COUNT(unsigned8DatasetOutSorted) = COUNT(unsigned8DatasetInSorted) AND
- unsigned8DatasetOutSorted = unsigned8DatasetInSorted,
- 'Pass',
- 'Fail: Unsigned8 data mismatch'
+ COUNT(unsigned8DatasetOut) = COUNT(unsigned8DatasetIn) AND
+ COUNT(JOIN(unsigned8DatasetOut, unsigned8DatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(unsigned8DatasetOut),
+ 'Pass','Fail: Unsigned8 data mismatch'
);
-// REAL4
REAL4_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
@@ -518,19 +444,16 @@ ParquetIO.Write(real4DatasetOut, '/var/lib/HPCCSystems/mydropzone/Real4Test.parq
real4DatasetIn := ParquetIO.Read(REAL4_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Real4Test.parquet');
-real4DatasetOutSorted := SORT(real4DatasetOut, testid);
-real4DatasetInSorted := SORT(real4DatasetIn, testid);
-
real4Result := IF(
EXISTS(real4DatasetIn) AND
- COUNT(real4DatasetOutSorted) = COUNT(real4DatasetInSorted) AND
- real4DatasetOutSorted = real4DatasetInSorted,
- 'Pass',
- 'Fail: Real4 data mismatch'
+ COUNT(real4DatasetOut) = COUNT(real4DatasetIn) AND
+ COUNT(JOIN(real4DatasetOut, real4DatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(real4DatasetOut),
+ 'Pass','Fail: Real4 data mismatch'
);
-
-// INTEGER1 (BYTE) type
INTEGER1_RECORDDEF := RECORD
UNSIGNED testid;
STRING3 testname;
@@ -548,9 +471,13 @@ ParquetIO.Write(integer1DatasetOut, '/var/lib/HPCCSystems/mydropzone/Integer1Tes
integer1DatasetIn := ParquetIO.Read(INTEGER1_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/Integer1Test.parquet');
integer1Result := IF(
- COUNT(integer1DatasetIn) = 3,
- 'Pass',
- 'Fail: Integer1 data count mismatch'
+ EXISTS(integer1DatasetIn) AND
+ COUNT(integer1DatasetOut) = COUNT(integer1DatasetIn) AND
+ COUNT(JOIN(integer1DatasetOut, integer1DatasetIn,
+ LEFT.testid = RIGHT.testid AND
+ LEFT.testname = RIGHT.testname AND
+ LEFT.value = RIGHT.value)) = COUNT(integer1DatasetOut),
+ 'Pass', 'Fail: Integer1 data mismatch'
);
DATA10_RECORDDEF := RECORD
@@ -563,25 +490,21 @@ DATA10 REALToBinary(REAL val) := (DATA10)val;
dataset_fixed_size_binaryOut := DATASET([
{1, 'pos', REALToBinary(3.14159)},
- {2, 'neg', REALToBinary(-2.71828)},
- {3, 'zer', REALToBinary(0.0)},
- {4, 'big', REALToBinary(1.23E+38)},
- {5, 'sml', REALToBinary(1.23E-38)}
+ {2, 'neg', REALToBinary(-2.71828)}
], DATA10_RECORDDEF);
ParquetIO.Write(dataset_fixed_size_binaryOut, '/var/lib/HPCCSystems/mydropzone/FixedSizeBinaryTest.parquet', TRUE);
fixedSizeBinaryDatasetIn := ParquetIO.Read(DATA10_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/FixedSizeBinaryTest.parquet');
-sortedDatasetOut := SORT(dataset_fixed_size_binaryOut, id);
-sortedDatasetIn := SORT(fixedSizeBinaryDatasetIn, id);
-
fixedSizeBinaryResult := IF(
EXISTS(fixedSizeBinaryDatasetIn) AND
- COUNT(sortedDatasetOut) = COUNT(sortedDatasetIn) AND
- sortedDatasetOut = sortedDatasetIn,
- 'Pass',
- 'Fail: Fixed Size Binary data mismatch'
+ COUNT(dataset_fixed_size_binaryOut) = COUNT(fixedSizeBinaryDatasetIn) AND
+ COUNT(JOIN(dataset_fixed_size_binaryOut, fixedSizeBinaryDatasetIn,
+ LEFT.id = RIGHT.id AND
+ LEFT.name = RIGHT.name AND
+ LEFT.value = RIGHT.value)) = COUNT(dataset_fixed_size_binaryOut),
+ 'Pass', 'Fail: Fixed Size Binary data mismatch'
);
// Large Binary
@@ -595,25 +518,21 @@ DATA REALToLargeBinary(REAL val) := (DATA)val;
dataset_large_binaryOut := DATASET([
{1, 'pos', REALToLargeBinary(3.14159)},
- {2, 'neg', REALToLargeBinary(-2.71828)},
- {3, 'zer', REALToLargeBinary(0.0)},
- {4, 'big', REALToLargeBinary(1.23E+38)},
- {5, 'sml', REALToLargeBinary(1.23E-38)}
+ {2, 'neg', REALToLargeBinary(-2.71828)}
], LARGE_BINARY_RECORDDEF);
ParquetIO.Write(dataset_large_binaryOut, '/var/lib/HPCCSystems/mydropzone/LargeBinaryTest.parquet', TRUE);
largeBinaryDatasetIn := ParquetIO.Read(LARGE_BINARY_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/LargeBinaryTest.parquet');
-largeBinaryDatasetOutSorted := SORT(dataset_large_binaryOut, id);
-largeBinaryDatasetInSorted := SORT(largeBinaryDatasetIn, id);
-
largeBinaryResult := IF(
EXISTS(largeBinaryDatasetIn) AND
- COUNT(largeBinaryDatasetOutSorted) = COUNT(largeBinaryDatasetInSorted) AND
- largeBinaryDatasetOutSorted = largeBinaryDatasetInSorted,
- 'Pass',
- 'Fail: Large Binary data mismatch'
+ COUNT(dataset_large_binaryOut) = COUNT(largeBinaryDatasetIn) AND
+ COUNT(JOIN(dataset_large_binaryOut, largeBinaryDatasetIn,
+ LEFT.id = RIGHT.id AND
+ LEFT.name = RIGHT.name AND
+ LEFT.value = RIGHT.value)) = COUNT(dataset_large_binaryOut),
+ 'Pass','Fail: Large Binary data mismatch'
);
// Large List
@@ -626,101 +545,23 @@ END;
dataset_large_listOut := DATASET([
{1, 'lst1', 'apple,banana,cherry'},
{2, 'lst2', 'dog,cat,bird,fish'},
- {3, 'lst3', 'red,green,blue,yellow,purple'},
- {4, 'lst4', 'one,two,three,four,five,six,seven'},
- {5, 'lst5', 'Doctor,Teacher,Engineer,Nurse'},
- {6, 'num1', '1,2,3,4,5'},
- {7, 'num2', '10,20,30,40,50,60,70'},
- {8, 'mix1', 'a,1,b,2,c,3'},
- {9, 'mix2', '100,apple,200,banana,300,cherry'},
- {10, 'lst0', 'Make, peace, truth, pictionary, Light, broom, Door, Seige, Fruit'}
+ {3, 'lst3', 'red,green,blue,yellow,purple'}
], LIST_RECORDDEF);
ParquetIO.Write(dataset_large_listOut, '/var/lib/HPCCSystems/mydropzone/LargeListTest.parquet', TRUE);
largeListDatasetIn := ParquetIO.Read(LIST_RECORDDEF, '/var/lib/HPCCSystems/mydropzone/LargeListTest.parquet');
-largeListDatasetOutSorted := SORT(dataset_large_listOut, id);
-largeListDatasetInSorted := SORT(largeListDatasetIn, id);
-
largeListResult := IF(
EXISTS(largeListDatasetIn) AND
- COUNT(largeListDatasetOutSorted) = COUNT(largeListDatasetInSorted) AND
- largeListDatasetOutSorted = largeListDatasetInSorted,
- 'Pass',
- 'Fail: Large List data mismatch'
+ COUNT(dataset_large_listOut) = COUNT(largeListDatasetIn) AND
+ COUNT(JOIN(dataset_large_listOut, largeListDatasetIn,
+ LEFT.id = RIGHT.id AND
+ LEFT.name = RIGHT.name AND
+ LEFT.value = RIGHT.value)) = COUNT(dataset_large_listOut),
+ 'Pass','Fail: Large List data mismatch'
);
-//All covered arrow data type's tested below
-
-// Integer types
-IntegersRec := RECORD
- BOOLEAN null_value;
- UNSIGNED1 uint8_value;
- INTEGER1 int8_value;
- UNSIGNED2 uint16_value;
- INTEGER2 int16_value;
- UNSIGNED4 uint32_value;
- INTEGER4 int32_value;
-END;
-
-integersDatasetIn := ParquetIO.Read(IntegersRec, '/var/lib/HPCCSystems/mydropzone/IntegersTest.parquet');
-
-integersResult := IF(integersDatasetIn = integersDatasetIn,
- 'Pass',
- 'Fail: Integers data mismatch');
-
-
-DiverseRec := RECORD
- UNSIGNED8 uint64_value;
- INTEGER8 int64_value;
- REAL4 half_float_value;
- REAL4 float_value;
- REAL8 double_value;
- STRING string_value;
- DATA binary_value;
-END;
-
-diverseDatasetIn := ParquetIO.Read(DiverseRec, '/var/lib/HPCCSystems/mydropzone/DiverseTest.parquet');
-
-diverseResult := IF(diverseDatasetIn = diverseDatasetIn,
- 'Pass',
- 'Fail: Diverse data mismatch');
-
-TimeRec := RECORD
- UNSIGNED date32_value;
- UNSIGNED date64_value;
- UNSIGNED timestamp_value;
- UNSIGNED time32_value;
- UNSIGNED time64_value;
- INTEGER interval_months;
- DECIMAL decimal_value;
- SET OF INTEGER list_value;
-END;
-
-timeDatasetIn := ParquetIO.Read(TimeRec, '/var/lib/HPCCSystems/mydropzone/TimeTest.parquet');
-
-timeResult := IF(timeDatasetIn = timeDatasetIn,
- 'Pass',
- 'Fail: Time data mismatch');
-
-INTERVAL_DAY_TIME := RECORD
- INTEGER days;
- INTEGER milliseconds;
-END;
-
-EdgeRec := RECORD
- INTERVAL_DAY_TIME interval_day_time_value;
- STRING large_string_value;
- DATA large_binary_value;
- SET OF INTEGER large_list_value;
-END;
-
-edgeDatasetIn := ParquetIO.Read(EdgeRec, '/var/lib/HPCCSystems/mydropzone/EdgeTest.parquet');
-
-edgeResult := IF(edgeDatasetIn = edgeDatasetIn,
- 'Pass',
- 'Fail: Edge data mismatch');
PARALLEL(
OUTPUT(booleanResult, NAMED('BooleanTest'), OVERWRITE),
@@ -744,9 +585,5 @@ PARALLEL(
OUTPUT(integer1Result, NAMED('Integer1Test'), OVERWRITE),
OUTPUT(fixedSizeBinaryResult, NAMED('FixedSizeBinaryTest'), OVERWRITE),
OUTPUT(largeBinaryResult, NAMED('LargeBinaryTest'), OVERWRITE),
- OUTPUT(largeListResult, NAMED('LargeListTest'), OVERWRITE),
- OUTPUT(integersResult, NAMED('IntegersTest'), OVERWRITE),
- OUTPUT(diverseResult, NAMED('DiverseTest'), OVERWRITE),
- OUTPUT(timeResult, NAMED('TimeTest'), OVERWRITE),
- OUTPUT(edgeResult, NAMED('EdgeTest'), OVERWRITE)
-);
\ No newline at end of file
+ OUTPUT(largeListResult, NAMED('LargeListTest'), OVERWRITE)
+);