Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/candidate-9.4.x'
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday committed Oct 13, 2023
2 parents 9e04332 + d609af7 commit 98bda80
Show file tree
Hide file tree
Showing 15 changed files with 350 additions and 147 deletions.
15 changes: 14 additions & 1 deletion common/pkgfiles/referencedfilelist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,15 @@ IPropertyTree *ReferencedFile::getRemoteStorageFileTree(IUserDescriptor *user, c

Owned<wsdfs::IDFSFile> dfsFile = wsdfs::lookupDFSFile(remoteLFN.str(), AccessMode::readSequential, INFINITE, wsdfs::keepAliveExpiryFrequency, user);
IPropertyTree *tree = (dfsFile) ? dfsFile->queryFileMeta() : nullptr;
return tree ? tree->getPropTree("File") : nullptr;
if (!tree)
{
DBGLOG("RemoteStorage FileMetaTree not found %s [remoteStorage=%s, prefix=%s]", remoteLFN.str(), nullText(remoteStorageName), nullText(remotePrefix));
return nullptr;
}
tree = tree->getPropTree("File");
if (!tree)
DBGLOG("RemoteStorage FileTree not found %s [remoteStorage=%s, prefix=%s]", remoteLFN.str(), nullText(remoteStorageName), nullText(remotePrefix));
return tree;
}

void ReferencedFile::resolveRemote(IUserDescriptor *user, const char *remoteStorageName, const char *remotePrefix, const StringArray &locations, const char *srcCluster, bool checkLocalFirst, StringArray *subfiles, bool resolveLFNForeign)
Expand Down Expand Up @@ -1061,6 +1069,7 @@ void ReferencedFileList::resolveFiles(const StringArray &locations, const char *

if (useRemoteStorage)
{
DBGLOG("ReferencedFileList resolving remote storage files at %s", nullText(remoteLocation));
if (!user)
user.setown(createUserDescriptor());
remoteStorage.set(remoteLocation);
Expand All @@ -1070,6 +1079,10 @@ void ReferencedFileList::resolveFiles(const StringArray &locations, const char *
}
else
{
if (!isEmptyString(remoteLocation))
DBGLOG("ReferencedFileList resolving remote dali files at %s", remoteLocation);
else
DBGLOG("ReferencedFileList resolving local files (no daliip)");
remote.setown(!isEmptyString(remoteLocation) ? createINode(remoteLocation, 7070) : nullptr);

ReferencedFileIterator files(this);
Expand Down
11 changes: 10 additions & 1 deletion dali/base/dautils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1489,7 +1489,16 @@ bool CDfsLogicalFileName::getExternalPath(StringBuffer &dir, StringBuffer &tail,
*e = makeStringExceptionV(-1, "plane:: does not support planes with more than one device '%s'", planeName.str());
return false;
}
dir.append(plane->queryPrefix());
const char * prefix = plane->queryPrefix();
//If the prefix is a PathSepChar, it should not be appended to the dir here because
//a PathSepChar will be appended to the dir inside the expandExternalPath() if the s
//is started with the "::".
//Also a trailing pathsepchar in the prefix should be removed.
if (!isRootDirectory(prefix))
{
dir.append(prefix);
removeTrailingPathSepChar(dir);
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/platform-build/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN groupadd -g 10001 hpcc
RUN useradd -s /bin/bash -r -m -N -c "hpcc runtime User" -u 10000 -g hpcc hpcc
RUN passwd -l hpcc

RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.18/bin/linux/amd64/kubectl && chmod +x ./kubectl && mv ./kubectl /usr/local/bin
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.27.6/bin/linux/amd64/kubectl && chmod +x ./kubectl && mv ./kubectl /usr/local/bin

WORKDIR /hpcc-dev
RUN mkdir build
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/vcpkg/platform-core-ubuntu-22.04.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ RUN apt-get install -y \
gdb \
nano

RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.18/bin/linux/amd64/kubectl && \
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.27.6/bin/linux/amd64/kubectl && \
chmod +x ./kubectl && \
mv ./kubectl /usr/local/bin

Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/vcpkg/platform-core-ubuntu-22.04/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ RUN apt-get install -y \
gdb \
nano

RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.18/bin/linux/amd64/kubectl && \
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.27.6/bin/linux/amd64/kubectl && \
chmod +x ./kubectl && \
mv ./kubectl /usr/local/bin

Expand Down
1 change: 1 addition & 0 deletions esp/services/ws_fileio/ws_fileioservice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ bool CWsFileIOEx::onReadFileData(IEspContext &context, IEspReadFileDataRequest &
return true;
}

//Despite the "DestRelativePath" saying it's relative for legacy reason, it also supports absolute paths.
const char* destRelativePath = req.getDestRelativePath();
if (!destRelativePath || (destRelativePath[0] == 0))
{
Expand Down
16 changes: 10 additions & 6 deletions esp/smc/SMCLib/TpCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,6 @@ extern TPWRAPPER_API IPropertyTree* getDropZoneAndValidateHostAndPath(const char

static SecAccessFlags getDropZoneScopePermissions(IEspContext& context, const IPropertyTree* dropZone, const char* dropZonePath)
{
if (isEmptyString(dropZonePath))
throw makeStringException(ECLWATCH_INVALID_CLUSTER_NAME, "getDropZoneScopePermissions(): DropZone path must be specified.");

//If the dropZonePath is an absolute path, change it to a relative path.
if (isAbsolutePath(dropZonePath))
{
Expand All @@ -203,9 +200,6 @@ static SecAccessFlags getDropZoneScopePermissions(IEspContext& context, const IP

static SecAccessFlags getDZPathScopePermissions(IEspContext& context, const char* dropZoneName, const char* dropZonePath, const char* dropZoneHost)
{
if (isEmptyString(dropZonePath))
throw makeStringException(ECLWATCH_INVALID_CLUSTER_NAME, "getDZPathScopePermissions(): DropZone path must be specified.");

Owned<IPropertyTree> dropZone;
if (isEmptyString(dropZoneName))
{
Expand Down Expand Up @@ -348,6 +342,16 @@ extern TPWRAPPER_API void validateDropZoneAccess(IEspContext& context, const cha
if (!isHostInPlane(dropZone, hostReq, true))
throw makeStringExceptionV(ECLWATCH_INVALID_INPUT, "Host %s is not valid DropZone plane %s", hostReq, targetDZNameOrHost);
}

//If the dropZonePath is an absolute path, change it to a relative path.
if (isAbsolutePath(fileNameWithRelPath))
{
const char* relativePath = getRelativePath(fileNameWithRelPath, dropZone->queryProp("@prefix"));
if (nullptr == relativePath)
throw makeStringExceptionV(ECLWATCH_INVALID_INPUT, "Invalid DropZone path %s.", fileNameWithRelPath);
fileNameWithRelPath = relativePath;
}

const char *dropZoneName = dropZone->queryProp("@name");
SecAccessFlags permission = getDZFileScopePermissions(context, dropZoneName, fileNameWithRelPath, hostReq);
if ((permission < permissionReq) && getGlobalConfigSP()->getPropBool("expert/@failOverToLegacyPhysicalPerms", !isContainerized()))
Expand Down
19 changes: 10 additions & 9 deletions plugins/parquet/examples/nested_io.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ parentRec := RECORD
UTF8_de firstname;
UTF8_de lastname;
childRec details;
END;
nested_dataset := DATASET([{U'J\353ck', U'\353ackson', { {22, 2, ['James', 'Jonathon']}, 5.9, 600}}, {'John', 'Johnson', { {17, 0, []}, 6.3, 18}},
END;
nested_dataset := DATASET([{U'J\353ck', U'\353ackson', { {22, 2, ['James', 'Jonathon']}, 5.9, 600}}, {'John', 'Johnson', { {17, 0, []}, 6.3, 18}},
{'Amy', U'Amy\353on', { {59, 1, ['Andy']}, 3.9, 59}}, {'Grace', U'Graceso\353', { {11, 3, ['Grayson', 'Gina', 'George']}, 7.9, 100}}], parentRec);

#IF(1)
ParquetIO.Write(nested_dataset, '/datadrive/dev/test_data/nested.parquet');
#END
writeStepA := ParquetIO.Write(nested_dataset, '/datadrive/dev/test_data/nested_A.parquet');
writeStepB := ParquetIO.Write(nested_dataset, '/datadrive/dev/test_data/nested_B.parquet');

read_in_a := ParquetIO.Read(parentRec, '/datadrive/dev/test_data/nested_A.parquet');
readStepA := OUTPUT(read_in_a, NAMED('NESTED_PARQUET_IO_A'));
read_in_b := ParquetIO.Read(parentRec, '/datadrive/dev/test_data/nested_B.parquet');
readStepB := OUTPUT(read_in_b, NAMED('NESTED_PARQUET_IO_B'));

#IF(1)
read_in := ParquetIO.Read(parentRec, '/datadrive/dev/test_data/nested.parquet');
OUTPUT(read_in, NAMED('NESTED_PARQUET_IO'));
#END
SEQUENTIAL(writeStepA, writeStepB, readStepA, readStepB);
26 changes: 26 additions & 0 deletions plugins/parquet/examples/taxi_data.ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
IMPORT PARQUET;

EXPORT Layout := RECORD
STRING VendorID;
STRING tpep_pickup_datetime;
STRING tpep_dropoff_datetime;
STRING passenger_count;
STRING trip_distance;
STRING RatecodeID;
STRING store_and_fwd_flag;
STRING PULocationID;
STRING DOLocationID;
STRING payment_type;
STRING fare_amount;
STRING extra;
STRING mta_tax;
STRING tip_amount;
STRING tolls_amount;
STRING improvement_surcharge;
STRING total_amount;
END;

tripData := '/datadrive/dev/test_data/yellow_tripdata_2017-01.parquet';
read_in := ParquetIO.Read(Layout, tripData);
COUNT(read_in);
OUTPUT(CHOOSEN(read_in, 100));
8 changes: 4 additions & 4 deletions plugins/parquet/parquet.ecllib
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ EXPORT ParquetIO := MODULE
RETURN _DoParquetReadPartition();
ENDMACRO;

EXPORT Write(outDS, filePath) := MACRO
EXPORT Write(outDS, filePath) := FUNCTIONMACRO
LOCAL _DoParquetWrite(STREAMED DATASET(RECORDOF(outDS)) _ds) := EMBED(parquet : activity, option('write'), destination(filePath))
ENDEMBED;
_doParquetWrite(outDS);
RETURN _doParquetWrite(outDS);
ENDMACRO;

EXPORT WritePartition(outDS, outRows = 100000, basePath) := MACRO
EXPORT WritePartition(outDS, outRows = 100000, basePath) := FUNCTIONMACRO
LOCAL _DoParquetWritePartition(STREAMED DATASET(RECORDOF(outDS)) _ds) := EMBED(parquet : activity, option('writepartition'), destination(basePath), MaxRowSize(outRows))
ENDEMBED;
_DoParquetWritePartition(outDS)
RETURN _DoParquetWritePartition(outDS);
ENDMACRO;
END;

Expand Down
Loading

0 comments on commit 98bda80

Please sign in to comment.