From 116c785330464c378ca1226e6b198aa216ad5f45 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Tue, 3 Dec 2024 12:07:54 +0000 Subject: [PATCH] HPCC-33064 Check files size consistency before reading Signed-off-by: Gavin Halliday --- dali/base/dafdesc.cpp | 72 +++++++++++++++++++ dali/base/dafdesc.hpp | 3 + ecl/hthor/hthor.cpp | 11 +++ roxie/ccd/ccdfile.cpp | 2 + .../activities/diskread/thdiskreadslave.cpp | 8 +++ 5 files changed, 96 insertions(+) diff --git a/dali/base/dafdesc.cpp b/dali/base/dafdesc.cpp index b03967a19c2..da66231570a 100644 --- a/dali/base/dafdesc.cpp +++ b/dali/base/dafdesc.cpp @@ -665,6 +665,10 @@ class CFileDescriptorBase: public CInterface virtual IFileDescriptor &querySelf() = 0; virtual unsigned copyClusterNum(unsigned partidx, unsigned copy,unsigned *replicate=NULL) = 0; + IPropertyTree & queryAttributes() const + { + return *attr; + } }; class CPartDescriptor : implements IPartDescriptor @@ -819,6 +823,74 @@ protected: friend class CFileDescriptor; return ismulti; } + IPropertyTree & queryAttributes() const + { + return *props; + } + + offset_t getFileSize(bool allowphysical,bool forcephysical) + { + offset_t ret = (offset_t)((forcephysical&&allowphysical)?-1:queryAttributes().getPropInt64("@size", -1)); + if (allowphysical&&(ret==(offset_t)-1)) + ret = getSize(true); + return ret; + } + + offset_t getDiskSize(bool allowphysical,bool forcephysical) + { + if (!::isCompressed(parent.queryAttributes())) + return getFileSize(allowphysical, forcephysical); + + if (forcephysical && allowphysical) + return getSize(false); // i.e. only if force, because all compressed should have @compressedSize attribute + + // NB: compressSize is disk size + return queryAttributes().getPropInt64("@compressedSize", -1); + } + + offset_t getSize(bool checkCompressed) + { + offset_t ret = (offset_t)-1; + StringBuffer firstname; + bool compressed = ::isCompressed(parent.queryAttributes()); + unsigned nc=parent.numCopies(partIndex); + for (unsigned copy=0;copy partfile = createIFile(getFilename(copy, rfn)); + if (checkCompressed && compressed) + { + Owned partFileIO = partfile->open(IFOread); + if (partFileIO) + { + Owned compressedIO = createCompressedFileReader(partFileIO); + if (compressedIO) + ret = compressedIO->size(); + else + throw makeStringExceptionV(DFSERR_PhysicalCompressedPartInvalid, "Compressed part is not in the valid format: %s", partfile->queryFilename()); + } + } + else + ret = partfile->size(); + if (ret!=(offset_t)-1) + return ret; + } + catch (IException *e) + { + StringBuffer s("CDistributedFilePart::getSize "); + rfn.getRemotePath(s); + EXCLOG(e, s.str()); + e->Release(); + } + if (copy==0) + rfn.getRemotePath(firstname); + } + throw makeStringExceptionV(DFSERR_CannotFindPartFileSize, "Cannot find physical file size for %s", firstname.str());; + } + + RemoteMultiFilename &getMultiFilename(unsigned copy, RemoteMultiFilename &rmfn) { if (ismulti) { diff --git a/dali/base/dafdesc.hpp b/dali/base/dafdesc.hpp index 535178828e2..377bfd1ab67 100644 --- a/dali/base/dafdesc.hpp +++ b/dali/base/dafdesc.hpp @@ -177,6 +177,9 @@ interface IPartDescriptor: extends IInterface virtual const char *queryOverrideName() = 0; // for non-standard files virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL)=0; // map copy number to cluster (and optionally replicate number) virtual IReplicatedFile *getReplicatedFile()=0; + + virtual offset_t getFileSize(bool allowphysical,bool forcephysical)=0; // gets the part filesize (NB this will be the *expanded* size) + virtual offset_t getDiskSize(bool allowphysical,bool forcephysical)=0; // gets the part size on disk (NB this will be the compressed size) }; typedef IArrayOf CPartDescriptorArray; typedef IIteratorOf IPartDescriptorIterator; diff --git a/ecl/hthor/hthor.cpp b/ecl/hthor/hthor.cpp index 610c15507cc..06267a4908d 100644 --- a/ecl/hthor/hthor.cpp +++ b/ecl/hthor/hthor.cpp @@ -8735,6 +8735,17 @@ bool CHThorDiskReadBaseActivity::openNext() { inputfile.setown(createIFile(rfilename)); + if (curPart) + { + offset_t expectedSize = curPart->getDiskSize(false, false); + if (expectedSize != unknownFileSize) + { + offset_t actualSize = inputfile->size(); + if(actualSize != expectedSize) + throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", inputfile->queryFilename(), expectedSize, actualSize); + } + } + if (compressed) { Owned eexp; diff --git a/roxie/ccd/ccdfile.cpp b/roxie/ccd/ccdfile.cpp index 9f627e91bee..e9976005597 100644 --- a/roxie/ccd/ccdfile.cpp +++ b/roxie/ccd/ccdfile.cpp @@ -3909,6 +3909,8 @@ class CcdFileTest : public CppUnit::TestFixture virtual const char *queryOverrideName() { UNIMPLEMENTED; } virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL) { UNIMPLEMENTED; } virtual IReplicatedFile *getReplicatedFile() { UNIMPLEMENTED; } + virtual offset_t getFileSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; } + virtual offset_t getDiskSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; } }; void testCopy() diff --git a/thorlcr/activities/diskread/thdiskreadslave.cpp b/thorlcr/activities/diskread/thdiskreadslave.cpp index 59d001238a0..c09718a041d 100644 --- a/thorlcr/activities/diskread/thdiskreadslave.cpp +++ b/thorlcr/activities/diskread/thdiskreadslave.cpp @@ -366,6 +366,14 @@ void CDiskRecordPartHandler::open() rwFlags |= DEFAULT_RWFLAGS; + offset_t expectedSize = partDesc->getDiskSize(false, false); + if (expectedSize != unknownFileSize) + { + offset_t actualSize = iFile->size(); + if(actualSize != expectedSize) + throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", iFile->queryFilename(), expectedSize, actualSize); + } + if (compressed) { rwFlags |= rw_compress;