Skip to content

Commit

Permalink
Merge pull request #19329 from ghalliday/issue33064
Browse files Browse the repository at this point in the history
HPCC-33064 Check files size consistency before reading

Reviewed-by: Jake Smith <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Dec 3, 2024
2 parents 3357598 + 116c785 commit 7f3f269
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 0 deletions.
72 changes: 72 additions & 0 deletions dali/base/dafdesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,10 @@ class CFileDescriptorBase: public CInterface
virtual IFileDescriptor &querySelf() = 0;
virtual unsigned copyClusterNum(unsigned partidx, unsigned copy,unsigned *replicate=NULL) = 0;

IPropertyTree & queryAttributes() const
{
return *attr;
}
};

class CPartDescriptor : implements IPartDescriptor
Expand Down Expand Up @@ -819,6 +823,74 @@ protected: friend class CFileDescriptor;
return ismulti;
}

IPropertyTree & queryAttributes() const
{
return *props;
}

offset_t getFileSize(bool allowphysical,bool forcephysical)
{
offset_t ret = (offset_t)((forcephysical&&allowphysical)?-1:queryAttributes().getPropInt64("@size", -1));
if (allowphysical&&(ret==(offset_t)-1))
ret = getSize(true);
return ret;
}

offset_t getDiskSize(bool allowphysical,bool forcephysical)
{
if (!::isCompressed(parent.queryAttributes()))
return getFileSize(allowphysical, forcephysical);

if (forcephysical && allowphysical)
return getSize(false); // i.e. only if force, because all compressed should have @compressedSize attribute

// NB: compressSize is disk size
return queryAttributes().getPropInt64("@compressedSize", -1);
}

offset_t getSize(bool checkCompressed)
{
offset_t ret = (offset_t)-1;
StringBuffer firstname;
bool compressed = ::isCompressed(parent.queryAttributes());
unsigned nc=parent.numCopies(partIndex);
for (unsigned copy=0;copy<nc;copy++)
{
RemoteFilename rfn;
try
{
Owned<IFile> partfile = createIFile(getFilename(copy, rfn));
if (checkCompressed && compressed)
{
Owned<IFileIO> partFileIO = partfile->open(IFOread);
if (partFileIO)
{
Owned<ICompressedFileIO> compressedIO = createCompressedFileReader(partFileIO);
if (compressedIO)
ret = compressedIO->size();
else
throw makeStringExceptionV(DFSERR_PhysicalCompressedPartInvalid, "Compressed part is not in the valid format: %s", partfile->queryFilename());
}
}
else
ret = partfile->size();
if (ret!=(offset_t)-1)
return ret;
}
catch (IException *e)
{
StringBuffer s("CDistributedFilePart::getSize ");
rfn.getRemotePath(s);
EXCLOG(e, s.str());
e->Release();
}
if (copy==0)
rfn.getRemotePath(firstname);
}
throw makeStringExceptionV(DFSERR_CannotFindPartFileSize, "Cannot find physical file size for %s", firstname.str());;
}


RemoteMultiFilename &getMultiFilename(unsigned copy, RemoteMultiFilename &rmfn)
{
if (ismulti) {
Expand Down
3 changes: 3 additions & 0 deletions dali/base/dafdesc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ interface IPartDescriptor: extends IInterface
virtual const char *queryOverrideName() = 0; // for non-standard files
virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL)=0; // map copy number to cluster (and optionally replicate number)
virtual IReplicatedFile *getReplicatedFile()=0;

virtual offset_t getFileSize(bool allowphysical,bool forcephysical)=0; // gets the part filesize (NB this will be the *expanded* size)
virtual offset_t getDiskSize(bool allowphysical,bool forcephysical)=0; // gets the part size on disk (NB this will be the compressed size)
};
typedef IArrayOf<IPartDescriptor> CPartDescriptorArray;
typedef IIteratorOf<IPartDescriptor> IPartDescriptorIterator;
Expand Down
11 changes: 11 additions & 0 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8735,6 +8735,17 @@ bool CHThorDiskReadBaseActivity::openNext()
{
inputfile.setown(createIFile(rfilename));

if (curPart)
{
offset_t expectedSize = curPart->getDiskSize(false, false);
if (expectedSize != unknownFileSize)
{
offset_t actualSize = inputfile->size();
if(actualSize != expectedSize)
throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", inputfile->queryFilename(), expectedSize, actualSize);
}
}

if (compressed)
{
Owned<IExpander> eexp;
Expand Down
2 changes: 2 additions & 0 deletions roxie/ccd/ccdfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3909,6 +3909,8 @@ class CcdFileTest : public CppUnit::TestFixture
virtual const char *queryOverrideName() { UNIMPLEMENTED; }
virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL) { UNIMPLEMENTED; }
virtual IReplicatedFile *getReplicatedFile() { UNIMPLEMENTED; }
virtual offset_t getFileSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; }
virtual offset_t getDiskSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; }
};

void testCopy()
Expand Down
8 changes: 8 additions & 0 deletions thorlcr/activities/diskread/thdiskreadslave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,14 @@ void CDiskRecordPartHandler::open()

rwFlags |= DEFAULT_RWFLAGS;

offset_t expectedSize = partDesc->getDiskSize(false, false);
if (expectedSize != unknownFileSize)
{
offset_t actualSize = iFile->size();
if(actualSize != expectedSize)
throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", iFile->queryFilename(), expectedSize, actualSize);
}

if (compressed)
{
rwFlags |= rw_compress;
Expand Down

0 comments on commit 7f3f269

Please sign in to comment.